From 79e1c96cf42cd67cdffbeb0a1a46a6908c1f95be Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 17 Nov 2024 16:02:48 +0000 Subject: [PATCH 01/10] Functions to fill an array with an interpolation matrix --- moment_kinetics/src/coordinates.jl | 16 ++-- moment_kinetics/src/gauss_legendre.jl | 19 ++++- moment_kinetics/src/interpolation.jl | 89 ++++++++++++++++++++- moment_kinetics/test/interpolation_tests.jl | 14 +++- 4 files changed, 130 insertions(+), 8 deletions(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 373d7c8f8..3cb68bf4a 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -95,6 +95,9 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams} scratch8::Array{mk_float,1} # scratch9 is an array used for intermediate calculations requiring n entries scratch9::Array{mk_float,1} + # scratch_int_nelement_plus_1 is an integer array used for intermediate calculations + # requiring nelement+1 entries + scratch_int_nelement_plus_1::Array{mk_int,1} # scratch_shared is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared::T @@ -307,6 +310,9 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, duniform_dgrid = allocate_float(coord_input.ngrid, coord_input.nelement_local) # scratch is an array used for intermediate calculations requiring n entries scratch = allocate_float(n_local) + # scratch_int_nelement_plus_1 is an array used for intermediate calculations requiring + # nelement+1 entries + scratch_int_nelement_plus_1 = allocate_int(coord_input.nelement_local + 1) if ignore_MPI scratch_shared = allocate_float(n_local) scratch_shared2 = allocate_float(n_local) @@ -387,11 +393,11 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, coord_input.cheb_option, coord_input.bc, coord_input.boundary_parameters, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), - copy(scratch), scratch_shared, scratch_shared2, scratch_shared3, scratch_2d, - copy(scratch_2d), advection, send_buffer, receive_buffer, comm, local_io_range, - global_io_range, element_scale, element_shift, - coord_input.element_spacing_option, element_boundaries, radau_first_element, - other_nodes, one_over_denominator) + copy(scratch), scratch_int_nelement_plus_1, scratch_shared, scratch_shared2, + scratch_shared3, scratch_2d, copy(scratch_2d), advection, send_buffer, + receive_buffer, comm, local_io_range, global_io_range, element_scale, + element_shift, coord_input.element_spacing_option, element_boundaries, + radau_first_element, other_nodes, one_over_denominator) if coord.n == 1 && occursin("v", coord.name) spectral = null_velocity_dimension_info() diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index ee9b706e4..de2f1a9fe 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -29,7 +29,8 @@ using SparseMatricesCSR using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float import ..calculus: elementwise_derivative!, mass_matrix_solve! -import ..interpolation: single_element_interpolate! +import ..interpolation: single_element_interpolate!, + fill_single_element_interpolation_matrix! using ..lagrange_polynomials: lagrange_poly_optimised using ..moment_kinetics_structs: weak_discretization_info @@ -373,6 +374,22 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c return nothing end +function fill_single_element_interpolation_matrix!( + matrix_slice, newgrid, jelement, coord, + gausslegendre::gausslegendre_base_info) + n_new = length(newgrid) + + for j ∈ 1:coord.ngrid + other_nodes = @view coord.other_nodes[:,j,jelement] + one_over_denominator = coord.one_over_denominator[j,jelement] + for i ∈ 1:n_new + matrix_slice[i,j] = lagrange_poly_optimised(other_nodes, one_over_denominator, newgrid[i]) + end + end + + return nothing +end + function mass_matrix_solve!(f, b, spectral::gausslegendre_info) # invert mass matrix system y = spectral.mass_matrix_lu \ b diff --git a/moment_kinetics/src/interpolation.jl b/moment_kinetics/src/interpolation.jl index 69041a463..366b8291f 100644 --- a/moment_kinetics/src/interpolation.jl +++ b/moment_kinetics/src/interpolation.jl @@ -59,7 +59,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) # Assumes points in newgrid are sorted. # May not be the most efficient algorithm. # Find start/end points for each element, storing their indices in kstart - kstart = Vector{mk_int}(undef, nelement+1) + kstart = coord.scratch_int_nelement_plus_1 # First element includes both boundary points, while all others have only one (to # avoid duplication), so calculate the first element outside the loop. @@ -121,6 +121,93 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) return nothing end +""" + fill_single_element_interpolation_matrix!( + matrix_slice, newgrid, jelement, coord, spectral) + +Set `matrix_slice` equal to the interpolation matrix that interpolates values from the +element `jelement` of the vector being multiplied onto the grid points given by `newgrid` +(which must be contained within the physical space covered by element `jelement`). + +`coord` is the `coordinate` object for the dimension in which the interpolation is done, +and `spectral` the discretization object corresponding to `jelement`. +""" +function fill_single_element_interpolation_matrix! end + +function fill_1d_interpolation_matrix!(matrix, newgrid, coord, spectral) + # define local variable nelement for convenience + nelement = coord.nelement_local + + n_new = size(newgrid)[1] + # Find which points belong to which element. + # istart[j] contains the index of the first point in newgrid that is within element + # j, and istart[nelement+1] is n_new if the last point is within coord.grid, or the + # index of the first element outside coord.grid otherwise. + # Assumes points in newgrid are sorted. + # May not be the most efficient algorithm. + # Find start/end points for each element, storing their indices in istart + istart = coord.scratch_int_nelement_plus_1 + + # First element includes both boundary points, while all others have only one (to + # avoid duplication), so calculate the first element outside the loop. + if coord.radau_first_element && coord.irank == 0 + first_element_spectral = spectral.radau + # For a grid with a Radau first element, the lower boundary of the first element + # is at coord=0, and the coordinate range is 0 Date: Sun, 17 Nov 2024 19:44:18 +0000 Subject: [PATCH 02/10] Function to fill matrix corresponding to interpolate_symmetric!() --- moment_kinetics/src/interpolation.jl | 24 ++++++++++++++++++- moment_kinetics/test/interpolation_tests.jl | 26 ++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/interpolation.jl b/moment_kinetics/src/interpolation.jl index 366b8291f..9e4f06ff2 100644 --- a/moment_kinetics/src/interpolation.jl +++ b/moment_kinetics/src/interpolation.jl @@ -5,7 +5,8 @@ Note these are not guaranteed to be highly optimized! """ module interpolation -export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric! +export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric!, + fill_interpolate_symmetric_matrix! using ..array_allocation: allocate_float using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info @@ -401,4 +402,25 @@ function interpolate_symmetric!(result, newgrid, f, oldgrid) return nothing end +function fill_interpolate_symmetric_matrix!(matrix_slice, newgrid, oldgrid) + nnew = length(newgrid) + nold = length(oldgrid) + + @boundscheck size(matrix_slice) == (nnew, nold) || error("Dimensions of matrix_slice ($(size(matrix_slice))) are not the same as (nnew($nnew),nold($nold)).") + + if nold == 1 + # Interpolating 'polynomial' is just a constant + matrix_slice .= 1.0 + else + for j ∈ 1:nold + one_over_denominator = 1.0 / prod((oldgrid[j]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j) + for i ∈ 1:nnew + matrix_slice[i,j] = prod((newgrid[i]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j) * one_over_denominator + end + end + end + + return nothing +end + end # interpolation diff --git a/moment_kinetics/test/interpolation_tests.jl b/moment_kinetics/test/interpolation_tests.jl index e18c5171d..f848ffefb 100644 --- a/moment_kinetics/test/interpolation_tests.jl +++ b/moment_kinetics/test/interpolation_tests.jl @@ -6,7 +6,7 @@ using moment_kinetics.array_allocation: allocate_float using moment_kinetics.coordinates: define_test_coordinate using moment_kinetics.interpolation: interpolate_to_grid_1d, fill_1d_interpolation_matrix!, interpolate_to_grid_z, - interpolate_to_grid_vpa, interpolate_symmetric! + interpolate_to_grid_vpa, interpolate_symmetric!, fill_interpolate_symmetric_matrix! using MPI @@ -123,6 +123,18 @@ function runtests() x[1:first_positive_ind-1]) @test isapprox(result, expected; rtol=rtol, atol=1.0e-14) + + @testset "matrix" begin + interp_matrix = allocate_float(nx - first_positive_ind + 1, + first_positive_ind - 1) + interp_matrix .= 0.0 + fill_interpolate_symmetric_matrix!(interp_matrix, + x[first_positive_ind:end], + x[1:first_positive_ind-1]) + + @test isapprox(interp_matrix * f[1:first_positive_ind-1], expected, + rtol=rtol, atol=1.0e-14) + end end @testset "upper to lower $nx" for nx ∈ 4:10 @@ -141,6 +153,18 @@ function runtests() x[first_positive_ind:end]) @test isapprox(result, expected; rtol=rtol, atol=1.0e-14) + + @testset "matrix" begin + interp_matrix = allocate_float(first_positive_ind - 1, + nx - first_positive_ind + 1) + interp_matrix .= 0.0 + fill_interpolate_symmetric_matrix!(interp_matrix, + x[1:first_positive_ind-1], + x[first_positive_ind:end]) + + @test isapprox(interp_matrix * f[first_positive_ind:end], expected, + rtol=rtol, atol=1.0e-14) + end end end end From c9df9a669b1dcbbdbe3d56bc56ddf06dc0af91c3 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 25 Nov 2024 19:27:37 +0000 Subject: [PATCH 03/10] Functions to calculate the derivative of an interpolating function --- moment_kinetics/src/chebyshev.jl | 67 +++++++++++- moment_kinetics/src/gauss_legendre.jl | 30 ++++- .../src/hermite_spline_interpolation.jl | 14 ++- moment_kinetics/src/interpolation.jl | 103 +++++++++++++++--- moment_kinetics/src/lagrange_polynomials.jl | 29 ++++- moment_kinetics/test/interpolation_tests.jl | 33 ++++++ 6 files changed, 258 insertions(+), 18 deletions(-) diff --git a/moment_kinetics/src/chebyshev.jl b/moment_kinetics/src/chebyshev.jl index ad8139120..716ca6599 100644 --- a/moment_kinetics/src/chebyshev.jl +++ b/moment_kinetics/src/chebyshev.jl @@ -466,7 +466,7 @@ function chebyshev_spectral_derivative!(df,f) end function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, - chebyshev::chebyshev_base_info) + chebyshev::chebyshev_base_info, derivative::Val{0}) # Temporary buffer to store Chebyshev coefficients cheby_f = chebyshev.df @@ -495,6 +495,71 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c return nothing end +# Evaluate first derivative of the interpolating function +function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, + chebyshev::chebyshev_base_info, derivative::Val{1}) + # Temporary buffer to store Chebyshev coefficients + cheby_f = chebyshev.df + + if length(cheby_f) == 1 + # Chebyshev 'polynomial' is only a constant, so derivative must be zero? + result .= 0.0 + return nothing + end + + # Need to transform newgrid values to a scaled z-coordinate associated with the + # Chebyshev coefficients to get the interpolated function values. Transform is a + # shift and scale so that the element coordinate goes from -1 to 1 + shift = 0.5 * (coord.grid[imin] + coord.grid[imax]) + scale = 2.0 / (coord.grid[imax] - coord.grid[imin]) + + # Get Chebyshev coefficients + chebyshev_forward_transform!(cheby_f, chebyshev.fext, f, chebyshev.forward, coord.ngrid) + + if length(cheby_f) == 2 + # Handle this case specially because the generic algorithm below uses cheby_f[3] + # explicitly. + for i ∈ 1:length(newgrid) + x = newgrid[i] + z = scale * (x - shift) + + # cheby_f[2] is multiplied by U_0, but U_0 = 1 + result[i] = cheby_f[2] * scale + end + return nothing + end + + # Need to evaluate first derivatives of the Chebyshev polynomials. Can do this using + # the differentiation formula + # https://en.wikipedia.org/wiki/Chebyshev_polynomials#Differentiation_and_integration + # d T_n / dz = n * U_(n-1) + # where U_n are the Chebyshev polynomials of the second kind, which in turn can be + # evaluated using the recurrence relation + # https://en.wikipedia.org/wiki/Chebyshev_polynomials#Recurrence_definition + # U_0 = 1 + # U_1 = 2*z + # U_(n+1) = 2*z*U_n - U_(n-1) + for i ∈ 1:length(newgrid) + x = newgrid[i] + z = scale * (x - shift) + # Evaluate sum of Chebyshev polynomials at z using recurrence relation + U_nminus1 = 1.0 + U_n = 2.0*z + + # Note that the derivative of the zero'th Chebyshev polynomial (a constant) does + # not contribute, so cheby_f[1] is not used. + nplus1 = 2.0 + result[i] = cheby_f[2] * U_nminus1 * scale + cheby_f[3] * nplus1 * U_n * scale + for coef ∈ @view(cheby_f[4:end]) + nplus1 += 1.0 + U_nminus1, U_n = U_n, 2.0 * z * U_n - U_nminus1 + result[i] += coef * nplus1 * U_n * scale + end + end + + return nothing +end + """ returns wgts array containing the integration weights associated with all grid points for Clenshaw-Curtis quadrature diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index de2f1a9fe..c59dbb8e8 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -31,7 +31,7 @@ using ..array_allocation: allocate_float import ..calculus: elementwise_derivative!, mass_matrix_solve! import ..interpolation: single_element_interpolate!, fill_single_element_interpolation_matrix! -using ..lagrange_polynomials: lagrange_poly_optimised +using ..lagrange_polynomials: lagrange_poly_optimised, lagrange_poly_derivative_optimised using ..moment_kinetics_structs: weak_discretization_info @@ -352,7 +352,8 @@ function elementwise_derivative!(coord, ff, adv_fac, spectral::gausslegendre_inf end function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, - gausslegendre::gausslegendre_base_info) + gausslegendre::gausslegendre_base_info, + derivative::Val{0}) n_new = length(newgrid) i = 1 @@ -374,6 +375,31 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c return nothing end +# Evaluate first derivative of the interpolating function +function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, + gausslegendre::gausslegendre_base_info, + derivative::Val{1}) + n_new = length(newgrid) + + i = 1 + other_nodes = @view coord.other_nodes[:,i,ielement] + one_over_denominator = coord.one_over_denominator[i,ielement] + this_f = f[i] + for j ∈ 1:n_new + result[j] = this_f * lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, newgrid[j]) + end + for i ∈ 2:coord.ngrid + other_nodes = @view coord.other_nodes[:,i,ielement] + one_over_denominator = coord.one_over_denominator[i,ielement] + this_f = f[i] + for j ∈ 1:n_new + result[j] += this_f * lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, newgrid[j]) + end + end + + return nothing +end + function fill_single_element_interpolation_matrix!( matrix_slice, newgrid, jelement, coord, gausslegendre::gausslegendre_base_info) diff --git a/moment_kinetics/src/hermite_spline_interpolation.jl b/moment_kinetics/src/hermite_spline_interpolation.jl index 255c5da91..03453fbfc 100644 --- a/moment_kinetics/src/hermite_spline_interpolation.jl +++ b/moment_kinetics/src/hermite_spline_interpolation.jl @@ -21,9 +21,16 @@ coord : coordinate not_spectral : finite_difference_info A finite_difference_info argument here indicates that the coordinate is not spectral-element discretized, i.e. it is on a uniform ('finite difference') grid. +derivative : Val(n) + The value of `n` the integer in the `Val{n}` indicates the order of the derivative to + be calculated of the interpolating function (only a few values of `n` are supported). + Defaults to Val(0), which means just calculating the interpolating function itself. """ +function interpolate_to_grid_1d! end + function interpolate_to_grid_1d!(result, new_grid, f, coord, - not_spectral::finite_difference_info) + not_spectral::finite_difference_info, + derivative::Val{0}=Val(0)) x = coord.grid n_new = length(new_grid) @@ -101,5 +108,10 @@ function interpolate_to_grid_1d!(result, new_grid, f, coord, return nothing end +function interpolate_to_grid_1d!(result, new_grid, f, coord, + not_spectral::finite_difference_info, + derivative::Val{1}) + error("First derivative interpolation not implemented for finite-difference yet.") +end end # hermite_spline_interpolation diff --git a/moment_kinetics/src/interpolation.jl b/moment_kinetics/src/interpolation.jl index 9e4f06ff2..c87d6b104 100644 --- a/moment_kinetics/src/interpolation.jl +++ b/moment_kinetics/src/interpolation.jl @@ -45,10 +45,15 @@ coord : coordinate spectral : discretization_info struct containing information for discretization, whose type determines which method is used. +derivative : Val(n) + The value of `n` the integer in the `Val{n}` indicates the order of the derivative to + be calculated of the interpolating function (only a few values of `n` are supported). + Defaults to Val(0), which means just calculating the interpolating function itself. """ function interpolate_to_grid_1d! end -function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) +function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral, + derivative::Val=Val(0)) # define local variable nelement for convenience nelement = coord.nelement_local @@ -83,10 +88,21 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) end # check to see if any of the newgrid points are to the left of the first grid point - for j ∈ 1:kstart[1]-1 - # if the new grid location is outside the bounds of the original grid, - # extrapolate f with Gaussian-like decay beyond the domain - result[j] = f[1] * exp(-(coord.grid[1] - newgrid[j])^2) + if isa(derivative, Val{0}) + for j ∈ 1:kstart[1]-1 + # if the new grid location is outside the bounds of the original grid, + # extrapolate f with Gaussian-like decay beyond the domain + result[j] = f[1] * exp(-(coord.grid[1] - newgrid[j])^2) + end + elseif isa(derivative, Val{1}) + for j ∈ 1:kstart[1]-1 + # if the new grid location is outside the bounds of the original grid, + # extrapolate f with Gaussian-like decay beyond the domain, then take + # derivative. + result[j] = 2.0 * (coord.grid[1] - newgrid[j]) * f[1] * exp(-(coord.grid[1] - newgrid[j])^2) + end + else + error("Unrecognised derivative=$derivative") end @inbounds for j ∈ 1:nelement # Search from kstart[j] to try to speed up the sort, but means result of @@ -101,7 +117,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) kmax = kstart[2] - 1 @views single_element_interpolate!(result[kmin:kmax], newgrid[kmin:kmax], f[imin:imax], imin, imax, 1, coord, - first_element_spectral) + first_element_spectral, derivative) end @inbounds for j ∈ 2:nelement kmin = kstart[j] @@ -111,12 +127,20 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) imax = coord.imax[j] @views single_element_interpolate!(result[kmin:kmax], newgrid[kmin:kmax], f[imin:imax], imin, imax, j, coord, - spectral.lobatto) + spectral.lobatto, derivative) end end - for k ∈ kstart[nelement+1]:n_new - result[k] = f[end] * exp(-(newgrid[k] - coord.grid[end])^2) + if isa(derivative, Val{0}) + for k ∈ kstart[nelement+1]:n_new + result[k] = f[end] * exp(-(newgrid[k] - coord.grid[end])^2) + end + elseif isa(derivative, Val{1}) + for k ∈ kstart[nelement+1]:n_new + result[k] = -2.0 * (newgrid[k] - coord.grid[end]) * f[end] * exp(-(newgrid[k] - coord.grid[end])^2) + end + else + error("Unrecognised derivative=$derivative") end return nothing @@ -210,7 +234,8 @@ function fill_1d_interpolation_matrix!(matrix, newgrid, coord, spectral) end function interpolate_to_grid_1d!(result, new_grid, f, coord, - spectral::null_spatial_dimension_info) + spectral::null_spatial_dimension_info, + derivative::Val{0}) # There is only one point in the 'old grid' represented by coord (as indicated by the # type of the `spectral` argument), and we are interpolating in a spatial dimension. # Assume that the variable should be taken to be constant in this dimension to @@ -221,7 +246,8 @@ function interpolate_to_grid_1d!(result, new_grid, f, coord, end function interpolate_to_grid_1d!(result, new_grid, f, coord, - spectral::null_velocity_dimension_info) + spectral::null_velocity_dimension_info, + derivative::Val{0}) # There is only one point in the 'old grid' represented by coord (as indicated by the # type of the `spectral` argument), and we are interpolating in a velocity space # dimension. Assume that the profile 'should be' a Maxwellian over the new grid, with @@ -247,6 +273,10 @@ coord : coordinate spectral : Bool or chebyshev_info struct containing information for discretization, whose type determines which method is used. +derivative : Val(n) + The value of `n` the integer in the `Val{n}` indicates the order of the derivative to + be calculated of the interpolating function (only a few values of `n` are supported). + Defaults to Val(0), which means just calculating the interpolating function itself. Returns ------- @@ -364,14 +394,19 @@ function interpolate_to_grid_vpa(newgrid, f::AbstractVector{mk_float}, vpa, spec end """ - interpolate_symmetric!(result, newgrid, f, oldgrid) + interpolate_symmetric!(result, newgrid, f, oldgrid, derivative=Val(0)) Interpolate f from oldgrid to newgrid, imposing that `f(x)` is symmetric around `x=0`, so the interpolation is done by fitting a polynomial in `x^2` to the values of `f` given on `oldgrid`, and evaluating on `newgrid`. Since interpolation is done in a polynomial of `x^2`, the signs of the points on `newgrid` and `oldgrid` do not matter, and are ignored. + +`Val(n)` can be passed as `derivative` to calculate the derivative of order `n` of the +interpolating function (only a few values of `n` are supported). """ -function interpolate_symmetric!(result, newgrid, f, oldgrid) +function interpolate_symmetric! end + +function interpolate_symmetric!(result, newgrid, f, oldgrid, derivative::Val{0}=Val(0)) nnew = length(newgrid) nold = length(oldgrid) @@ -402,6 +437,48 @@ function interpolate_symmetric!(result, newgrid, f, oldgrid) return nothing end +function interpolate_symmetric!(result, newgrid, f, oldgrid, derivative::Val{1}) + nnew = length(newgrid) + nold = length(oldgrid) + + if nnew == 0 + return nothing + end + + # Check all points in newgrid are covered by oldgrid (i.e. between zero and the + # maximum of oldgrid) + @boundscheck maximum(abs.(newgrid)) ≤ maximum(abs.(oldgrid)) || error("newgrid bigger ($(maximum(abs.(newgrid)))) than oldgrid ($(maximum(abs.(oldgrid)))).") + @boundscheck size(result) == size(newgrid) || error("Size of result ($(size(result))) is not the same as size of newgrid ($(size(newgrid))).") + @boundscheck size(f) == size(oldgrid) || error("Size of f ($(size(f))) is not the same as size of oldgrid ($(size(oldgrid))).") + + if nold == 1 + # Interpolating 'polynomial' is just a constant + result .= 0.0 + elseif nold == 2 + # Interpolating polynomial is linear, so result is a constant + # df/dx = f[1] / (oldgrid[1] - oldgrid[2]) + f[2] / (oldgrid[2] - oldgrid[1]) + return (f[1] - f[2]) / (oldgrid[1] - oldgrid[2]) + else + result .= 0.0 + for j ∈ 1:nold + one_over_denominator = 1.0 / prod((oldgrid[j]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j) + this_f = f[j] + for i ∈ 1:nnew + temp = 0.0 + for k ∈ 1:nold + if k == j + continue + end + temp += prod((newgrid[i]^2 - oldgrid[l]^2) for l ∈ 1:nold if l ∉ (j,k)) + end + result[i] += this_f * temp * 2.0 * newgrid[i] * one_over_denominator + end + end + end + + return nothing +end + function fill_interpolate_symmetric_matrix!(matrix_slice, newgrid, oldgrid) nnew = length(newgrid) nold = length(oldgrid) diff --git a/moment_kinetics/src/lagrange_polynomials.jl b/moment_kinetics/src/lagrange_polynomials.jl index d82a895f0..7dc8ab13d 100644 --- a/moment_kinetics/src/lagrange_polynomials.jl +++ b/moment_kinetics/src/lagrange_polynomials.jl @@ -8,7 +8,7 @@ their being scattered (and possibly duplicated) in other modules. """ module lagrange_polynomials -export lagrange_poly, lagrange_poly_optimised +export lagrange_poly, lagrange_poly_optimised, lagrange_poly_derivative_optimised """ Lagrange polynomial @@ -50,4 +50,31 @@ function lagrange_poly_optimised(other_nodes, one_over_denominator, x) return prod(x - n for n ∈ other_nodes) * one_over_denominator end +""" + lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, x) + +Optimised calculation of the first derivative of a Lagrange polynomial, making use of +pre-calculated quantities. + +`other_nodes` is a vector of the grid points in this element where this Lagrange +polynomial is zero (the other nodes than the one where it is 1). + +`one_over_denominator` is `1/prod(x0 - n for n ∈ other_nodes)` where `x0` is the grid +point where this Lagrange polynomial is 1. + +`x` is the point to evaluate the Lagrange polynomial at. +""" +function lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, x) + result = 0.0 + k = length(other_nodes) + # Is there a more efficient way of doing this? Not a big deal for now because this + # function will only be used to calculate a preconditioner matrix, which is done + # rarely. + for i ∈ 1:k + result += prod(x - other_nodes[j] for j ∈ 1:k if j ≠ i) + end + result *= one_over_denominator + return result +end + end diff --git a/moment_kinetics/test/interpolation_tests.jl b/moment_kinetics/test/interpolation_tests.jl index f848ffefb..12ddc5eff 100644 --- a/moment_kinetics/test/interpolation_tests.jl +++ b/moment_kinetics/test/interpolation_tests.jl @@ -15,6 +15,9 @@ using MPI test_function(L, coords...) = [cospi(2.0*sum(x)/L)*exp(-sinpi(2.0*sum(x)/L)) for x in Iterators.product(coords...)] +test_function_first_derivative(L, coord) = + [-2.0*π/L*sinpi(2.0*x/L)*exp(-sinpi(2.0*x/L)) - 2.0*π/L*cospi(2.0*x/L)^2*exp(-sinpi(2.0*x/L)) for x in coord] + println("interpolation tests") # define inputs needed for the test @@ -58,6 +61,16 @@ function runtests() @test isapprox(interpolate_to_grid_1d(test_grid, f, z, spectral), expected, rtol=rtol, atol=1.e-14) + if discretization != "finite_difference" + # Last element of test_grid is on the last grid point. + # interpolate_to_grid_1d() will interpret this as being just outside + # the grid and give the derivative of the extrapolation function + # there, which is not what we want to test, so skip that point. + @test isapprox(interpolate_to_grid_1d(test_grid[1:end-1], f, z, spectral, Val(1)), + test_function_first_derivative(z.L, test_grid[1:end-1]), + rtol=rtol, atol=1.e-14) + end + if discretization == "gausslegendre_pseudospectral" @testset "matrix" begin interp_matrix = allocate_float(length(test_grid), z.n) @@ -114,6 +127,7 @@ function runtests() x = @. 1.8 * (ix - 1) / (nx - 1) - 1.23 first_positive_ind = searchsortedlast(x, 0.0) + 1 f = cos.(x) + dfdx = -sin.(x) expected = f[first_positive_ind:end] @@ -124,6 +138,15 @@ function runtests() @test isapprox(result, expected; rtol=rtol, atol=1.0e-14) + expected_deriv = dfdx[first_positive_ind:end] + + result_deriv = zeros(nx - first_positive_ind + 1) + @views interpolate_symmetric!(result_deriv, x[first_positive_ind:end], + f[1:first_positive_ind-1], + x[1:first_positive_ind-1], Val(1)) + + @test isapprox(result_deriv, expected_deriv; rtol=2.0*rtol, atol=1.0e-14) + @testset "matrix" begin interp_matrix = allocate_float(nx - first_positive_ind + 1, first_positive_ind - 1) @@ -144,6 +167,7 @@ function runtests() x = @. 1.8 * (ix - 1) / (nx - 1) - 0.57 first_positive_ind = searchsortedlast(x, 0.0) + 1 f = cos.(x) + dfdx = -sin.(x) expected = f[1:first_positive_ind-1] @@ -154,6 +178,15 @@ function runtests() @test isapprox(result, expected; rtol=rtol, atol=1.0e-14) + expected_deriv = dfdx[1:first_positive_ind-1] + + result_deriv = zeros(first_positive_ind-1) + @views interpolate_symmetric!(result_deriv, x[1:first_positive_ind-1], + f[first_positive_ind:end], + x[first_positive_ind:end], Val(1)) + + @test isapprox(result_deriv, expected_deriv; rtol=2.0*rtol, atol=1.0e-14) + @testset "matrix" begin interp_matrix = allocate_float(first_positive_ind - 1, nx - first_positive_ind + 1) From ef088b12297f8d3769c23914faab82684fac3cb2 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 28 Nov 2024 18:09:53 +0000 Subject: [PATCH 04/10] Refactor calculation of components of wall-bc integral corrections ...for electrons. Separate out the calculations into separate functions, which will be reused when calculating the contribution of the wall boundary condition to the Jacobian matrix. --- .../src/electron_kinetic_equation.jl | 656 ++++++++++-------- 1 file changed, 383 insertions(+), 273 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 49b5e81fb..664c59f8a 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -69,6 +69,8 @@ using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_deriv # Only needed so we can reference it in a docstring import ..runge_kutta +const integral_correction_sharpness = 4.0 + """ update_electron_pdf is a function that uses the electron kinetic equation to solve for the updated electron pdf @@ -2305,6 +2307,309 @@ function get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) end +function fill_integral_pieces!( + pdf_part, vthe, vpa, vpa_unnorm, density_integral_pieces, flow_integral_pieces, + energy_integral_pieces, cubic_integral_pieces, quartic_integral_pieces) + + @. density_integral_pieces = pdf_part * vpa.wgts / sqrt(pi) + @. flow_integral_pieces = density_integral_pieces * vpa_unnorm / vthe + @. energy_integral_pieces = flow_integral_pieces * vpa_unnorm / vthe + @. cubic_integral_pieces = energy_integral_pieces * vpa_unnorm / vthe + @. quartic_integral_pieces = cubic_integral_pieces * vpa_unnorm / vthe +end + +function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime, + c1, c1prime, c2, c2prime, d1, d1prime, + e1, e1prime, e2, e2prime, u_over_vt, + bc_constraints) + if bc_constraints + alpha = a1 + 2.0 * a2 + alphaprime = a1prime + 2.0 * a2prime + beta = c1 + 2.0 * c2 + betaprime = c1prime + 2.0 * c2prime + gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta + gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime + delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 + deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime + + A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) + Aprime = (0.5 * betaprime - deltaprime + - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) + / (beta * gamma - alpha * delta) + ) / (beta * gamma - alpha * delta) + C = (1.0 - alpha * A) / beta + Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 + else + A = 1.0 + Aprime = 0.0 + C = 0.0 + Cprime = 0.0 + end + + epsilon = A * b1 + C * d1 - u_over_vt + epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime + + return epsilon, epsilonprime, A, C +end + +function get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_lowerz, flow_integral_pieces_lowerz, + energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz, + quartic_integral_pieces_lowerz, bc_constraints) + # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and + # minus_vcut_ind where -vcut is. + vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + + function get_for_one_moment(integral_pieces) + # Integral contributions from the cell containing vcut. + # Define these as follows to be consistent with the way the cutoff is + # applied around plus_vcut_ind below. + # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` + # include all the contributions from the grid points + # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from + # 'inside' the grid cell. + if vcut_fraction < 0.5 + integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) + + integral_pieces[minus_vcut_ind] + integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + else + integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction) + integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] + + integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + end + + part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1 + + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) + + part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2]) + part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell + # part2prime is d(part2)/d(vcut) + part2prime = -part1prime + + return part1, part1prime, part2, part2prime + end + this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz) + this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz) + this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz) + this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz) + this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz) + + return get_residual_and_coefficients_for_bc( + this_a1, this_a1prime, this_a2, this_a2prime, this_b1, + this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, + this_d1, this_d1prime, this_e1, this_e1prime, this_e2, + this_e2prime, u_over_vt, bc_constraints)..., + this_a2, this_b2, this_c2, this_d2 +end + +function get_lowerz_integral_correction_components( + pdf, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, + vcut, minus_vcut_ind, plus_vcut_ind, ir, bc_constraints) + + # Need to recalculate integral pieces with the updated distribution function + density_integral_pieces_lowerz = vpa.scratch3 + flow_integral_pieces_lowerz = vpa.scratch4 + energy_integral_pieces_lowerz = vpa.scratch5 + cubic_integral_pieces_lowerz = vpa.scratch6 + quartic_integral_pieces_lowerz = vpa.scratch7 + fill_integral_pieces!( + @view(pdf[:,1,1,ir]), vthe[1,ir], vpa, vpa_unnorm, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz) + + # Update the part2 integrals since we've applied the A and C factors + _, _, _, _, a2, b2, c2, d2 = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_lowerz, flow_integral_pieces_lowerz, + energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz, + quartic_integral_pieces_lowerz, bc_constraints) + + function get_part3_for_one_moment_lower(integral_pieces) + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) + + part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz) + b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz) + c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz) + d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz) + + # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. + correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * integral_correction_sharpness * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe[1,ir]^2) + for ivpa ∈ 1:sigma_ind + # We only add the corrections to 'part3', so zero them out for negative v_∥. + # I think this is only actually significant for `sigma_ind-1` and + # `sigma_ind`. Even though `sigma_ind` is part of the distribution + # function that we are correcting, for v_∥>0, it affects the integral in + # the 'sigma_cell' between `sigma_ind-1` and `sigma_ind`, which would + # affect the numerically calculated integrals for f(v_∥<0), so if we + # 'corrected' its value, those integrals would change and the constraints + # would not be exactly satisfied. The difference should be small, as the + # correction at that point is multiplied by + # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. + correction0_integral_pieces[ivpa] = 0.0 + end + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir] + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir] + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir] + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir] + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir] + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir] + + alpha = get_part3_for_one_moment_lower(correction0_integral_pieces) + beta = get_part3_for_one_moment_lower(correction1_integral_pieces) + gamma = get_part3_for_one_moment_lower(correction2_integral_pieces) + delta = get_part3_for_one_moment_lower(correction3_integral_pieces) + epsilon = get_part3_for_one_moment_lower(correction4_integral_pieces) + zeta = get_part3_for_one_moment_lower(correction5_integral_pieces) + eta = get_part3_for_one_moment_lower(correction6_integral_pieces) + + return a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta +end + +function get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz, bc_constraints) + # vcut_fraction is the fraction of the distance between plus_vcut_ind and + # plus_vcut_ind+1 where vcut is. + vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + + function get_for_one_moment(integral_pieces) + # Integral contribution from the cell containing vcut + # Define these as follows to be consistent with the way the cutoff is + # applied around plus_vcut_ind below. + # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` + # include all the contributions from the grid points `plus_vcut_ind` + # and `plus_vcut_ind+1`, not just those from 'inside' the grid cell. + if vcut_fraction > 0.5 + integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] + + integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5) + integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + else + integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction) + integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) + + integral_pieces[plus_vcut_ind+1] + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + end + + part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1 + + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) + + part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1]) + part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell + # part2prime is d(part2)/d(vcut) + part2prime = -part1prime + + return part1, part1prime, part2, part2prime + end + this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz) + this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz) + this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz) + this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz) + this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz) + + return get_residual_and_coefficients_for_bc( + this_a1, this_a1prime, this_a2, this_a2prime, this_b1, + this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, + this_d1, this_d1prime, this_e1, this_e1prime, this_e2, + this_e2prime, u_over_vt, bc_constraints)..., + this_a2, this_b2, this_c2, this_d2 +end + +function get_upperz_integral_correction_components( + pdf, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, + vcut, minus_vcut_ind, plus_vcut_ind, ir, bc_constraints) + + # Need to recalculate integral pieces with the updated distribution function + density_integral_pieces_upperz = vpa.scratch3 + flow_integral_pieces_upperz = vpa.scratch4 + energy_integral_pieces_upperz = vpa.scratch5 + cubic_integral_pieces_upperz = vpa.scratch6 + quartic_integral_pieces_upperz = vpa.scratch7 + fill_integral_pieces!( + @view(pdf[:,1,end,ir]), vthe[end,ir], vpa, vpa_unnorm, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz) + + # Update the part2 integrals since we've applied the A and C factors + _, _, _, _, a2, b2, c2, d2 = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz, bc_constraints) + + function get_part3_for_one_moment_upper(integral_pieces) + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) + + part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz) + b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz) + c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz) + d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz) + + # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. + correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * integral_correction_sharpness * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe[end,ir]^2) + for ivpa ∈ sigma_ind:vpa.n + # We only add the corrections to 'part3', so zero them out for positive v_∥. + # I think this is only actually significant for `sigma_ind` and + # `sigma_ind+1`. Even though `sigma_ind` is part of the distribution + # function that we are correcting, for v_∥<0, it affects the integral in + # the 'sigma_cell' between `sigma_ind` and `sigma_ind+1`, which would + # affect the numerically calculated integrals for f(v_∥>0), so if we + # 'corrected' its value, those integrals would change and the constraints + # would not be exactly satisfied. The difference should be small, as the + # correction at that point is multiplied by + # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. + correction0_integral_pieces[ivpa] = 0.0 + end + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir] + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir] + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir] + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir] + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir] + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir] + + alpha = get_part3_for_one_moment_upper(correction0_integral_pieces) + beta = get_part3_for_one_moment_upper(correction1_integral_pieces) + gamma = get_part3_for_one_moment_upper(correction2_integral_pieces) + delta = get_part3_for_one_moment_upper(correction3_integral_pieces) + epsilon = get_part3_for_one_moment_upper(correction4_integral_pieces) + zeta = get_part3_for_one_moment_upper(correction5_integral_pieces) + eta = get_part3_for_one_moment_upper(correction6_integral_pieces) + + return a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta +end + @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi, ir; @@ -2373,39 +2678,6 @@ end newton_max_its = 100 - function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime, - c1, c1prime, c2, c2prime, d1, d1prime, - e1, e1prime, e2, e2prime, u_over_vt) - if bc_constraints - alpha = a1 + 2.0 * a2 - alphaprime = a1prime + 2.0 * a2prime - beta = c1 + 2.0 * c2 - betaprime = c1prime + 2.0 * c2prime - gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta - gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime - delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 - deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime - - A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) - Aprime = (0.5 * betaprime - deltaprime - - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) - / (beta * gamma - alpha * delta) - ) / (beta * gamma - alpha * delta) - C = (1.0 - alpha * A) / beta - Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 - else - A = 1.0 - Aprime = 0.0 - C = 0.0 - Cprime = 0.0 - end - - epsilon = A * b1 + C * d1 - u_over_vt - epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime - - return epsilon, epsilonprime, A, C - end - @serial_region begin if z.irank == 0 if z.bc != "wall" @@ -2444,72 +2716,28 @@ end # Note that we need to include the normalisation factor of 1/sqrt(pi) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. - density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) - flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1] - energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1] - cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1] - quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1] - - function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) - # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and - # minus_vcut_ind where -vcut is. - local vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) - - function get_for_one_moment(integral_pieces) - # Integral contributions from the cell containing vcut. - # Define these as follows to be consistent with the way the cutoff is - # applied around plus_vcut_ind below. - # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` - # include all the contributions from the grid points - # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from - # 'inside' the grid cell. - if vcut_fraction < 0.5 - integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) + - integral_pieces[minus_vcut_ind] - integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction) - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) - else - integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction) - integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] + - integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5) - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) - end - - part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1 - - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) - - part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2]) - part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell - # part2prime is d(part2)/d(vcut) - part2prime = -part1prime - - return part1, part1prime, part2, part2prime - end - this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz) - this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz) - this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz) - this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz) - this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz) - - return get_residual_and_coefficients_for_bc( - this_a1, this_a1prime, this_a2, this_a2prime, this_b1, - this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, - this_d1, this_d1prime, this_e1, this_e1prime, this_e2, - this_e2prime, u_over_vt)..., - this_a2, this_b2, this_c2, this_d2 - end + density_integral_pieces_lowerz = vpa.scratch3 + flow_integral_pieces_lowerz = vpa.scratch4 + energy_integral_pieces_lowerz = vpa.scratch5 + cubic_integral_pieces_lowerz = vpa.scratch6 + quartic_integral_pieces_lowerz = vpa.scratch7 + fill_integral_pieces!( + @view(pdf[:,1,1]), vthe[1], vpa, vpa_unnorm, + density_integral_pieces_lowerz, flow_integral_pieces_lowerz, + energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz, + quartic_integral_pieces_lowerz) counter = 1 A = 1.0 C = 0.0 # Always do at least one update of vcut - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz, + bc_constraints) if bc_constraints while true # Newton iteration update. Note that primes denote derivatives with @@ -2531,7 +2759,13 @@ end vcut = vcut + delta_v minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz, + bc_constraints) if abs(epsilon) < newton_tol break @@ -2558,7 +2792,13 @@ end break end - epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + epsilon, epsilonprime, _, _, _, _, _, _ = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz, + bc_constraints) end end @@ -2591,60 +2831,11 @@ end # boundary condition, but would not be numerically true because of the # interpolation. - # Need to recalculate these with the updated distribution function - @views @. density_integral_pieces_lowerz = pdf[:,1,1] * vpa.wgts / sqrt(pi) - @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1] - @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1] - @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1] - @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1] - - # Update the part2 integrals since we've applied the A and C factors - _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) - - function get_part3_for_one_moment_lower(integral_pieces) - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) - - part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1]) - part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell - - return part3 - end - a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz) - b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz) - c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz) - d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz) - - # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. - sharpness = 4.0 - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1]^2) - for ivpa ∈ 1:sigma_ind - # We only add the corrections to 'part3', so zero them out for negative v_∥. - # I think this is only actually significant for `sigma_ind-1` and - # `sigma_ind`. Even though `sigma_ind` is part of the distribution - # function that we are correcting, for v_∥>0, it affects the integral in - # the 'sigma_cell' between `sigma_ind-1` and `sigma_ind`, which would - # affect the numerically calculated integrals for f(v_∥<0), so if we - # 'corrected' its value, those integrals would change and the constraints - # would not be exactly satisfied. The difference should be small, as the - # correction at that point is multiplied by - # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. - correction0_integral_pieces[ivpa] = 0.0 - end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1] - - alpha = get_part3_for_one_moment_lower(correction0_integral_pieces) - beta = get_part3_for_one_moment_lower(correction1_integral_pieces) - gamma = get_part3_for_one_moment_lower(correction2_integral_pieces) - delta = get_part3_for_one_moment_lower(correction3_integral_pieces) - epsilon = get_part3_for_one_moment_lower(correction4_integral_pieces) - zeta = get_part3_for_one_moment_lower(correction5_integral_pieces) - eta = get_part3_for_one_moment_lower(correction6_integral_pieces) + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_lowerz_integral_correction_components( + @view(pdf[:,1,1]), vthe[1], vpa, vpa_unnorm, u_over_vt, + sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, + bc_constraints) # Update the v_∥>0 part of f to correct the moments as # f(0 0.5 - integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] + - integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5) - integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction) - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) - else - integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction) - integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) + - integral_pieces[plus_vcut_ind+1] - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) - end - - part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1 - - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) - - part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1]) - part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell - # part2prime is d(part2)/d(vcut) - part2prime = -part1prime - - return part1, part1prime, part2, part2prime - end - this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz) - this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz) - this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz) - this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz) - this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz) - - return get_residual_and_coefficients_for_bc( - this_a1, this_a1prime, this_a2, this_a2prime, this_b1, - this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, - this_d1, this_d1prime, this_e1, this_e1prime, this_e2, - this_e2prime, u_over_vt)..., - this_a2, this_b2, this_c2, this_d2 - end + density_integral_pieces_upperz = vpa.scratch3 + flow_integral_pieces_upperz = vpa.scratch4 + energy_integral_pieces_upperz = vpa.scratch5 + cubic_integral_pieces_upperz = vpa.scratch6 + quartic_integral_pieces_upperz = vpa.scratch7 + fill_integral_pieces!( + @view(pdf[:,1,end]), vthe[end], vpa, vpa_unnorm, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz) counter = 1 # Always do at least one update of vcut - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz, bc_constraints) if bc_constraints while true # Newton iteration update. Note that primes denote derivatives with @@ -2801,7 +2948,13 @@ end vcut = vcut + delta_v plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_upperz, + flow_integral_pieces_upperz, energy_integral_pieces_upperz, + cubic_integral_pieces_upperz, quartic_integral_pieces_upperz, + bc_constraints) if abs(epsilon) < newton_tol break @@ -2828,7 +2981,13 @@ end break end - epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + epsilon, epsilonprime, _, _, _, _, _, _ = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_upperz, + flow_integral_pieces_upperz, energy_integral_pieces_upperz, + cubic_integral_pieces_upperz, quartic_integral_pieces_upperz, + bc_constraints) end end @@ -2861,60 +3020,11 @@ end # boundary condition, but would not be numerically true because of the # interpolation. - # Need to recalculate these with the updated distribution function - @views @. density_integral_pieces_upperz = pdf[:,1,end] * vpa.wgts / sqrt(pi) - @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end] - @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end] - @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end] - @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end] - - # Update the part2 integrals since we've applied the A and C factors - _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) - - function get_part3_for_one_moment_upper(integral_pieces) - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) - - part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1]) - part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell - - return part3 - end - a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz) - b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz) - c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz) - d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz) - - # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. - sharpness = 4.0 - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end]^2) - for ivpa ∈ sigma_ind:vpa.n - # We only add the corrections to 'part3', so zero them out for positive v_∥. - # I think this is only actually significant for `sigma_ind` and - # `sigma_ind+1`. Even though `sigma_ind` is part of the distribution - # function that we are correcting, for v_∥<0, it affects the integral in - # the 'sigma_cell' between `sigma_ind` and `sigma_ind+1`, which would - # affect the numerically calculated integrals for f(v_∥>0), so if we - # 'corrected' its value, those integrals would change and the constraints - # would not be exactly satisfied. The difference should be small, as the - # correction at that point is multiplied by - # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. - correction0_integral_pieces[ivpa] = 0.0 - end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end] - - alpha = get_part3_for_one_moment_upper(correction0_integral_pieces) - beta = get_part3_for_one_moment_upper(correction1_integral_pieces) - gamma = get_part3_for_one_moment_upper(correction2_integral_pieces) - delta = get_part3_for_one_moment_upper(correction3_integral_pieces) - epsilon = get_part3_for_one_moment_upper(correction4_integral_pieces) - zeta = get_part3_for_one_moment_upper(correction5_integral_pieces) - eta = get_part3_for_one_moment_upper(correction6_integral_pieces) + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_upperz_integral_correction_components( + @view(pdf[:,1,end]), vthe[end], vpa, vpa_unnorm, u_over_vt, + sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, + bc_constraints) # Update the v_∥>0 part of f to correct the moments as # f(0 Date: Wed, 13 Nov 2024 22:20:04 +0000 Subject: [PATCH 05/10] Add wall bc to kinetic electron Jacobian The boundary condition matrix should be added to the Jacobian so that it acts as a constraint setting the boundary points equal to the values they would have due to the boundary condition. This can be arranged by setting the RHS equal to zero, and the rows of the Jacobian corresponding to the boundary points to (I - M_wall), where I is the identity and M_wall is the matrix that would generate the perturbations of the boundary points given the perturbations of the state vector. --- moment_kinetics/src/coordinates.jl | 12 +- .../src/electron_kinetic_equation.jl | 1288 +++++++++++++++-- moment_kinetics/test/jacobian_matrix_tests.jl | 561 ++++++- .../test/nonlinear_solver_tests.jl | 6 +- 4 files changed, 1661 insertions(+), 206 deletions(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 3cb68bf4a..33c8b9299 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -95,6 +95,8 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams} scratch8::Array{mk_float,1} # scratch9 is an array used for intermediate calculations requiring n entries scratch9::Array{mk_float,1} + # scratch10 is an array used for intermediate calculations requiring n entries + scratch10::Array{mk_float,1} # scratch_int_nelement_plus_1 is an integer array used for intermediate calculations # requiring nelement+1 entries scratch_int_nelement_plus_1::Array{mk_int,1} @@ -393,11 +395,11 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, coord_input.cheb_option, coord_input.bc, coord_input.boundary_parameters, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), - copy(scratch), scratch_int_nelement_plus_1, scratch_shared, scratch_shared2, - scratch_shared3, scratch_2d, copy(scratch_2d), advection, send_buffer, - receive_buffer, comm, local_io_range, global_io_range, element_scale, - element_shift, coord_input.element_spacing_option, element_boundaries, - radau_first_element, other_nodes, one_over_denominator) + copy(scratch), copy(scratch), scratch_int_nelement_plus_1, scratch_shared, + scratch_shared2, scratch_shared3, scratch_2d, copy(scratch_2d), advection, + send_buffer, receive_buffer, comm, local_io_range, global_io_range, + element_scale, element_shift, coord_input.element_spacing_option, + element_boundaries, radau_first_element, other_nodes, one_over_denominator) if coord.n == 1 && occursin("v", coord.name) spectral = null_velocity_dimension_info() diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 664c59f8a..0b9e9bc09 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -18,8 +18,8 @@ using ..calculus: derivative!, second_derivative!, integral, using ..communication using ..gauss_legendre: gausslegendre_info using ..input_structs -using ..interpolation: interpolate_to_grid_1d!, - interpolate_symmetric! +using ..interpolation: interpolate_to_grid_1d!, fill_1d_interpolation_matrix!, + interpolate_symmetric!, fill_interpolate_symmetric_matrix! using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float using ..electron_fluid_equations: calculate_electron_moments!, @@ -897,7 +897,7 @@ global_rank[] == 0 && println("recalculating precon") nl_solver_params.preconditioners[ir] fill_electron_kinetic_equation_Jacobian!( - precon_matrix, f_electron_new, electron_ppar_new, moments, + precon_matrix, f_electron_new, electron_ppar_new, moments, phi, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, @@ -1099,10 +1099,10 @@ global_rank[] == 0 && println("recalculating precon") # guess is zero, fill_electron_kinetic_equation_Jacobian!( explicit_J, f_electron_new, electron_ppar_new, moments, - collisions, composition, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, :explicit_z, false) + phi, collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, + t_params, ion_dt, ir, evolve_ppar, :explicit_z, false) end begin_z_region() @loop_z iz begin @@ -1112,7 +1112,7 @@ global_rank[] == 0 && println("recalculating precon") A, f_electron_new[:,:,iz], electron_ppar_new[iz], dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, zeroth_moment[iz], first_moment[iz], second_moment[iz], - third_moment[iz], dthird_moment_dz[iz], collisions, + third_moment[iz], dthird_moment_dz[iz], phi[iz], collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, @@ -1152,7 +1152,7 @@ global_rank[] == 0 && println("recalculating precon") # Get sparse matrix for explicit, right-hand-side part of the # solve. fill_electron_kinetic_equation_Jacobian!( - explicit_J, f_electron_new, electron_ppar_new, moments, + explicit_J, f_electron_new, electron_ppar_new, moments, phi, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, @@ -1522,36 +1522,7 @@ global_rank[] == 0 && println("recalculating precon") vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) - # Boundary conditions on incoming part of distribution function. Note - # that as density, upar, ppar do not change in this implicit step, - # f_electron_newvar, f_old, and residual should all be zero at exactly - # the same set of grid points, so it is reasonable to zero-out - # `residual` to impose the boundary condition. We impose this after - # subtracting f_old in case rounding errors, etc. mean that at some - # point f_old had a different boundary condition cut-off index. - begin_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[1,ir], - moments.electron.upar[1,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] > -zero - f_electron_residual[ivpa,ivperp,1] = 0.0 - end - end - end - if z.irank == z.nrank - 1 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir], - moments.electron.upar[end,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] < zero - f_electron_residual[ivpa,ivperp,end] = 0.0 - end - end - end - end + zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir) return nothing end @@ -1940,47 +1911,7 @@ to allow the outer r-loop to be parallelised. enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, vperp_adv, vperp_diffusion) end - if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) - # Boundary conditions on incoming part of distribution function. Note that - # as density, upar, ppar do not change in this implicit step, f_new, - # f_old, and residual should all be zero at exactly the same set of grid - # points, so it is reasonable to zero-out `residual` to impose the - # boundary condition. We impose this after subtracting f_old in case - # rounding errors, etc. mean that at some point f_old had a different - # boundary condition cut-off index. - begin_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - iz = 1 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], - fvec_in.electron_upar[iz,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] > -zero - f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 - end - end - end - if z.irank == z.nrank - 1 - iz = z.n - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], - fvec_in.electron_upar[iz,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] < zero - f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 - end - end - end - end - begin_z_region() - @loop_z iz begin - @views moment_constraints_on_residual!(f_electron_residual[:,:,iz], - f_electron_new[:,:,iz], - (evolve_density=true, - evolve_upar=true, - evolve_ppar=true), - vpa) - end + zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir) return nothing end @@ -2150,19 +2081,19 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp end end -function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) - u_over_vt = upar[1,ir] / vthe[1,ir] +function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa) + u_over_vt = upar / vthe # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar # shift' sigma = -u_over_vt - vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * (vpa.grid - sigma) + vpa_unnorm = @. vpa.scratch2 = vthe * (vpa.grid - sigma) # Initial guess for cut-off velocity is result from previous RK stage (which # might be the previous timestep if this is the first stage). Recalculate this # value from phi. - vcut = sqrt(phi[1,ir] / me_over_mi) + vcut = sqrt(phi / me_over_mi) # -vcut is between minus_vcut_ind-1 and minus_vcut_ind minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) @@ -2224,20 +2155,20 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) reversed_wpa_of_minus_vpa end -function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) - u_over_vt = upar[end,ir] / vthe[end,ir] +function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa) + u_over_vt = upar / vthe # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar # shift' sigma = -u_over_vt # Delete the upar contribution here if ignoring the 'upar shift' - vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * (vpa.grid - sigma) + vpa_unnorm = @. vpa.scratch2 = vthe * (vpa.grid - sigma) # Initial guess for cut-off velocity is result from previous RK stage (which # might be the previous timestep if this is the first stage). Recalculate this # value from phi. - vcut = sqrt(phi[end,ir] / me_over_mi) + vcut = sqrt(phi / me_over_mi) # vcut is between plus_vcut_ind and plus_vcut_ind+1 plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) @@ -2311,7 +2242,7 @@ function fill_integral_pieces!( pdf_part, vthe, vpa, vpa_unnorm, density_integral_pieces, flow_integral_pieces, energy_integral_pieces, cubic_integral_pieces, quartic_integral_pieces) - @. density_integral_pieces = pdf_part * vpa.wgts / sqrt(pi) + @. density_integral_pieces = pdf_part * vpa.wgts / sqrt(π) @. flow_integral_pieces = density_integral_pieces * vpa_unnorm / vthe @. energy_integral_pieces = flow_integral_pieces * vpa_unnorm / vthe @. cubic_integral_pieces = energy_integral_pieces * vpa_unnorm / vthe @@ -2412,8 +2343,8 @@ function get_integrals_and_derivatives_lowerz( end function get_lowerz_integral_correction_components( - pdf, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, - vcut, minus_vcut_ind, plus_vcut_ind, ir, bc_constraints) + pdf_slice, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, vcut, + minus_vcut_ind, plus_vcut_ind, bc_constraints) # Need to recalculate integral pieces with the updated distribution function density_integral_pieces_lowerz = vpa.scratch3 @@ -2422,7 +2353,7 @@ function get_lowerz_integral_correction_components( cubic_integral_pieces_lowerz = vpa.scratch6 quartic_integral_pieces_lowerz = vpa.scratch7 fill_integral_pieces!( - @view(pdf[:,1,1,ir]), vthe[1,ir], vpa, vpa_unnorm, density_integral_pieces_lowerz, + pdf_slice, vthe, vpa, vpa_unnorm, density_integral_pieces_lowerz, flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz) @@ -2449,7 +2380,7 @@ function get_lowerz_integral_correction_components( d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz) # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * integral_correction_sharpness * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe[1,ir]^2) + correction0_integral_pieces = @. vpa.scratch3 = pdf_slice * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe^2) for ivpa ∈ 1:sigma_ind # We only add the corrections to 'part3', so zero them out for negative v_∥. # I think this is only actually significant for `sigma_ind-1` and @@ -2463,12 +2394,12 @@ function get_lowerz_integral_correction_components( # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. correction0_integral_pieces[ivpa] = 0.0 end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir] + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe alpha = get_part3_for_one_moment_lower(correction0_integral_pieces) beta = get_part3_for_one_moment_lower(correction1_integral_pieces) @@ -2540,8 +2471,8 @@ function get_integrals_and_derivatives_upperz( end function get_upperz_integral_correction_components( - pdf, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, - vcut, minus_vcut_ind, plus_vcut_ind, ir, bc_constraints) + pdf_slice, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, vcut, + minus_vcut_ind, plus_vcut_ind, bc_constraints) # Need to recalculate integral pieces with the updated distribution function density_integral_pieces_upperz = vpa.scratch3 @@ -2550,7 +2481,7 @@ function get_upperz_integral_correction_components( cubic_integral_pieces_upperz = vpa.scratch6 quartic_integral_pieces_upperz = vpa.scratch7 fill_integral_pieces!( - @view(pdf[:,1,end,ir]), vthe[end,ir], vpa, vpa_unnorm, + pdf_slice, vthe, vpa, vpa_unnorm, density_integral_pieces_upperz, flow_integral_pieces_upperz, energy_integral_pieces_upperz, cubic_integral_pieces_upperz, quartic_integral_pieces_upperz) @@ -2578,7 +2509,7 @@ function get_upperz_integral_correction_components( d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz) # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * integral_correction_sharpness * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe[end,ir]^2) + correction0_integral_pieces = @. vpa.scratch3 = pdf_slice * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe^2) for ivpa ∈ sigma_ind:vpa.n # We only add the corrections to 'part3', so zero them out for positive v_∥. # I think this is only actually significant for `sigma_ind` and @@ -2592,12 +2523,12 @@ function get_upperz_integral_correction_components( # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. correction0_integral_pieces[ivpa] = 0.0 end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir] + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe alpha = get_part3_for_one_moment_upper(correction0_integral_pieces) beta = get_part3_for_one_moment_upper(correction1_integral_pieces) @@ -2690,8 +2621,9 @@ end vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, element_with_zero, element_with_zero_boundary, last_point_near_zero, - reversed_wpa_of_minus_vpa = get_cutoff_params_lower(upar, vthe, phi, - me_over_mi, vpa, ir) + reversed_wpa_of_minus_vpa = + get_cutoff_params_lower(upar[1,ir], vthe[1,ir], phi[1,ir], me_over_mi, + vpa) # interpolate the pdf onto this grid # 'near zero' means in the range where @@ -2713,7 +2645,7 @@ end pdf[last_point_near_zero+1:end,1,1] .= reversed_pdf_far_from_zero # Per-grid-point contributions to moment integrals - # Note that we need to include the normalisation factor of 1/sqrt(pi) that + # Note that we need to include the normalisation factor of 1/sqrt(π) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. density_integral_pieces_lowerz = vpa.scratch3 @@ -2882,8 +2814,9 @@ end vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction, element_with_zero, element_with_zero_boundary, first_point_near_zero, - reversed_wpa_of_minus_vpa = get_cutoff_params_upper(upar, vthe, phi, - me_over_mi, vpa, ir) + reversed_wpa_of_minus_vpa = + get_cutoff_params_upper(upar[end,ir], vthe[end,ir], phi[end,ir], + me_over_mi, vpa) # interpolate the pdf onto this grid # 'near zero' means in the range where @@ -2905,7 +2838,7 @@ end pdf[1:first_point_near_zero-1,1,end] .= reversed_pdf # Per-grid-point contributions to moment integrals - # Note that we need to include the normalisation factor of 1/sqrt(pi) that + # Note that we need to include the normalisation factor of 1/sqrt(π) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. density_integral_pieces_upperz = vpa.scratch3 @@ -3056,6 +2989,1089 @@ end return nothing end +# In several places it is useful to zero out (in residuals, etc.) the points that would be +# set by the z boundary condition. +function zero_z_boundary_condition_points(residual, z, vpa, moments, ir) + if z.bc ∈ ("wall", "constant",) && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[1,ir], + moments.electron.upar[1,ir], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,1] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir], + moments.electron.upar[end,ir], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,end] = 0.0 + end + end + end + end + return nothing +end + +""" + add_wall_boundary_condition_to_Jacobian!(jacobian, phi, pdf, ppar, vthe, upar, z, + vperp, vpa, vperp_spectral, vpa_spectral, + vpa_adv, moments, vpa_diffusion, me_over_mi, + ir) + +All the contributions that we add in this function have to be added with a -'ve sign so +that they combine with the 1 on the diagonal of the preconditioner matrix to make rows +corresponding to the boundary points which define constraint equations imposing the +boundary condition on those entries of δg (when the right-hand-side is set to zero). +""" +@timeit global_timer add_wall_boundary_condition_to_Jacobian!( + jacobian, phi, pdf, ppar, vthe, upar, z, vperp, vpa, + vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, + me_over_mi, ir, include, iz=nothing; + pdf_offset=0, ppar_offset=0) = begin + if z.bc != "wall" + return nothing + end + + if include ∈ (:all, :explicit_v) + include_lower = (z.irank == 0) + include_upper = (z.irank == z.nrank - 1) + zend = z.n + phi_lower = phi[1] + phi_upper = phi[end] + pdf_lower = @view pdf[:,:,1] + pdf_upper = @view pdf[:,:,end] + ppar_lower = ppar[1] + ppar_upper = ppar[end] + vthe_lower = vthe[1] + vthe_upper = vthe[end] + upar_lower = upar[1] + upar_upper = upar[end] + shared_mem_parallel = true + elseif include === :implicit_v + include_lower = (z.irank == 0) && iz == 1 + include_upper = (z.irank == z.nrank - 1) && iz == z.n + zend = 1 + phi_lower = phi_upper = phi + pdf_lower = pdf_upper = pdf + ppar_lower = ppar_upper = ppar + vthe_lower = vthe_upper = vthe + upar_lower = upar_upper = upar + + # When using :implicit_v, this function is called inside a loop that is already + # parallelised over z, so we cannot change the parallel region type. + shared_mem_parallel = false + else + return nothing + end + + if include_lower + shared_mem_parallel && begin_vperp_region() + @loop_vperp ivperp begin + # Skip velocity space boundary points. + if vperp.n > 1 && ivperp == vperp.n + continue + end + + # Get matrix entries for the response of the sheath-edge boundary condition. + # Ignore constraints, as these are non-linear and also should be small + # corrections which should not matter much for a preconditioner. + + jac_range = pdf_offset+(ivperp-1)*vpa.n+1 : pdf_offset+ivperp*vpa.n + jacobian_zbegin = @view jacobian[jac_range,jac_range] + jacobian_zbegin_ppar = @view jacobian[jac_range,ppar_offset+1] + + vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, last_point_near_zero, + reversed_wpa_of_minus_vpa = + get_cutoff_params_lower(upar_lower, vthe_lower, phi_lower, me_over_mi, + vpa) + + plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) + # plus_vcut_fraction is the fraction of the distance between plus_vcut_ind and + # plus_vcut_ind+1 where vcut is. + plus_vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + if plus_vcut_fraction > 0.5 + last_nonzero_ind = plus_vcut_ind + 1 + else + last_nonzero_ind = plus_vcut_ind + end + + # Interpolate to the 'near zero' points + @views fill_interpolate_symmetric_matrix!( + jacobian_zbegin[sigma_ind:last_point_near_zero,element_with_zero_boundary:sigma_ind-1], + vpa_unnorm[sigma_ind:last_point_near_zero], + vpa_unnorm[element_with_zero_boundary:sigma_ind-1]) + + # Interpolate to the 'far from zero' points + @views fill_1d_interpolation_matrix!( + jacobian_zbegin[last_nonzero_ind:-1:last_point_near_zero+1,:], + reversed_wpa_of_minus_vpa[vpa.n-last_nonzero_ind+1:vpa.n-last_point_near_zero], + vpa, vpa_spectral) + + # Reverse the sign of the elements we just filled + jacobian_zbegin[sigma_ind:last_nonzero_ind,1:sigma_ind-1] .*= -1.0 + + if plus_vcut_fraction > 0.5 + jacobian_zbegin[last_nonzero_ind,1:sigma_ind-1] .*= plus_vcut_fraction - 0.5 + else + jacobian_zbegin[last_nonzero_ind,1:sigma_ind-1] .*= plus_vcut_fraction + 0.5 + end + + # Fill in elements giving response to changes in electron_ppar + # A change to ppar results in a shift in the location of w_∥(v_∥=0). + # The interpolated values of g_e(w_∥) that are filled by the boundary + # condition are (dropping _∥ subscripts for the remaining comments in this + # if-clause) + # g(w|v>0) = g(2*u/vth - w) + # So + # δg(w|v>0) = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δvth + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δ(sqrt(2*ppar/n) + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δppar * vth / (2*ppar) + # = -u/vth/ppar * dg/dw(2*u/vth - w) * δppar + # + # As jacobian_zbegin_ppar only depends on variations of a single value + # `ppar[1]`, it might be more maintainable and computationally cheaper to + # calculate jacobian_zbegin_ppar with a finite difference method (applying the + # boundary condition function with a perturbed pressure `ppar[1] + ϵ` for some + # small ϵ) rather than calculating it using the method below. If we used the + # finite difference approach, we would have to be careful that the ϵ step does + # not change the cutoff index (otherwise we would pick up non-smooth changes) + # - one possibility might to be to switch to `-ϵ` instead of `+ϵ` if this + # happens. + + dpdfdv_near_zero = @view vpa.scratch[sigma_ind:last_point_near_zero] + @views interpolate_symmetric!(dpdfdv_near_zero, + vpa_unnorm[sigma_ind:last_point_near_zero], + pdf_lower[element_with_zero_boundary:sigma_ind-1,ivperp], + vpa_unnorm[element_with_zero_boundary:sigma_ind-1], + Val(1)) + # The above call to interpolate_symmetric calculates dg/dv rather than dg/dw, + # so need to multiply by an extra factor of vthe. + # δg(w|v>0) = -u/ppar * dg/dv(2*u/vth - w) * δppar + @. jacobian_zbegin_ppar[sigma_ind:last_point_near_zero] -= + -upar_lower / ppar_lower * dpdfdv_near_zero + + dpdfdw_far_from_zero = @view vpa.scratch[last_point_near_zero+1:last_nonzero_ind] + @views interpolate_to_grid_1d!(dpdfdw_far_from_zero, + reversed_wpa_of_minus_vpa[vpa.n-last_nonzero_ind+1:vpa.n-last_point_near_zero], + pdf_lower[:,ivperp], vpa, vpa_spectral, Val(1)) + reverse!(dpdfdw_far_from_zero) + # Note that because we calculated the derivative of the interpolating + # function, and then reversed the results, we need to multiply the derivative + # by -1. + @. jacobian_zbegin_ppar[last_point_near_zero+1:last_nonzero_ind] -= + upar_lower / vthe_lower / ppar_lower * dpdfdw_far_from_zero + + # Whatever the variation due to interpolation is at the last nonzero grid + # point, it will be reduced by the cutoff. + if plus_vcut_fraction > 0.5 + jacobian_zbegin_ppar[last_nonzero_ind] *= plus_vcut_fraction - 0.5 + else + jacobian_zbegin_ppar[last_nonzero_ind] *= plus_vcut_fraction + 0.5 + end + + # The change in electron_ppar also changes the position of + # wcut = (vcut - upar)/vthe + # as vcut does not change (within the Krylov iteration where this + # preconditioner matrix is used), but vthe does because of the change in + # electron_ppar. We actually use plus_vcut_fraction calculated from + # vpa_unnorm, so it is most convenient to consider: + # v = vthe * w + upar + # δv = δvthe * w + # = δvthe * (v - upar)/vthe + # = δppar * vthe / (2*ppar) * (v - upar)/vthe + # = δppar * (v - upar) / 2 / ppar + # with vl and vu the values of v at the grid points below and above vcut + # plus_vcut_fraction = (vcut - vl) / (vu - vl) + # δplus_vcut_fraction = -(vcut - vl) / (vu - vl)^2 * (δvu - δvl) - δvl / (vu - vl) + # δplus_vcut_fraction = [-(vcut - vl) / (vu - vl)^2 * (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δplus_vcut_fraction = [-(vcut - vl) / (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δplus_vcut_fraction = -(vcut - upar) / (vu - vl) / 2 / ppar * δppar + interpolated_pdf_at_last_nonzero_ind = @view vpa.scratch[1:1] + reversed_last_nonzero_ind = vpa.n-last_nonzero_ind+1 + @views interpolate_to_grid_1d!(interpolated_pdf_at_last_nonzero_ind, + reversed_wpa_of_minus_vpa[reversed_last_nonzero_ind:reversed_last_nonzero_ind], + pdf_lower[:,ivperp], vpa, vpa_spectral) + + dplus_vcut_fraction_dp = -(vcut - upar_lower) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) / 2.0 / ppar_lower + jacobian_zbegin_ppar[last_nonzero_ind] -= interpolated_pdf_at_last_nonzero_ind[] * dplus_vcut_fraction_dp + + # Calculate some numerical integrals of dpdfdw that we will need later + function get_part3_for_one_moment_lower(integral_pieces) + # Integral contribution from the cell containing sigma + # The contribution from integral_pieces[sigma_ind-1] should be dropped, + # because that point is on the input grid, and this correction is due to + # the changes in interpolated values on the output grid due to δp_e∥. The + # value of g_e at sigma_ind-1 isn't interpolated so does not change in + # this way. + integral_sigma_cell = 0.5 * integral_pieces[sigma_ind] + + part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + # The contents of jacobian_zbegin_ppar at this point are the coefficients + # needed to get the new distribution function due to a change δp, which we can + # now use to calculate the response of various integrals to the same change. + # Note that jacobian_zbegin_ppar already contains `2.0*dsigma_dp`. + @. vpa.scratch = -jacobian_zbegin_ppar * vpa.wgts / sqrt(π) + da3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + db3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dc3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dd3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch = -jacobian_zbegin_ppar * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2) + vpa.scratch[sigma_ind-1:sigma_ind] .= 0.0 + dalpha_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dbeta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dgamma_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + ddelta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + depsilon_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dzeta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + deta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + + pdf_lowerz = vpa.scratch10 + @views @. pdf_lowerz[1:sigma_ind-1] = pdf_lower[1:sigma_ind-1,ivperp] + + # interpolate the pdf_lowerz onto this grid + # 'near zero' means in the range where + # abs(v_∥)≤abs(lower boundary of element including v_∥=0) + # 'far from zero' means larger values of v_∥. + + # Interpolate to the 'near zero' points + @views interpolate_symmetric!(pdf_lowerz[sigma_ind:last_point_near_zero], + vpa_unnorm[sigma_ind:last_point_near_zero], + pdf_lowerz[element_with_zero_boundary:sigma_ind-1], + vpa_unnorm[element_with_zero_boundary:sigma_ind-1]) + + # Interpolate to the 'far from zero' points + reversed_pdf_far_from_zero = @view pdf_lowerz[last_point_near_zero+1:end] + interpolate_to_grid_1d!(reversed_pdf_far_from_zero, + reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero], + pdf_lowerz, vpa, vpa_spectral) + reverse!(reversed_pdf_far_from_zero) + + minus_vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + if minus_vcut_fraction < 0.5 + lower_cutoff_ind = minus_vcut_ind-1 + else + lower_cutoff_ind = minus_vcut_ind + end + if plus_vcut_fraction > 0.5 + upper_cutoff_ind = plus_vcut_ind+1 + upper_cutoff_factor = plus_vcut_fraction - 0.5 + else + upper_cutoff_ind = plus_vcut_ind + upper_cutoff_factor = plus_vcut_fraction + 0.5 + end + pdf_lowerz[upper_cutoff_ind] *= upper_cutoff_factor + pdf_lowerz[upper_cutoff_ind+1:end] .= 0.0 + + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_lowerz_integral_correction_components( + pdf_lowerz, vthe_lower, vpa, vpa_unnorm, u_over_vt, sigma_ind, + sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, false) + + output_range = sigma_ind+1:upper_cutoff_ind + + v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_lower + + correction_matrix = [alpha beta gamma delta ; + beta gamma delta epsilon ; + gamma delta epsilon zeta ; + delta epsilon zeta eta + ] + + # jacobian_zbegin_ppar state at this point would generate (when multiplied by + # some δp) a δf due to the effectively-shifted grid. The unperturbed f + # required corrections to make its integrals correct. Need to apply the same + # corrections to δf. + # This term seems to have a very small contribution, and could probably be + # skipped. + A, B, C, D = correction_matrix \ [a2-a3, -b2-b3, c2-c3, -d2-d3] + @. jacobian_zbegin_ppar[output_range] *= + 1.0 + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) + + # Calculate the changes in the integrals a2, b2, c2, d2, a3, b3, c3, and d3 in + # response to changes in electron_ppar. The changes are calculated for the + # combinations (a2-a3), (-b2-b3), (c2-c3), and (-d2-d3) to take advantage of + # some cancellations. + + # Need to calculate the variation of various intermediate quantities with + # δp. + # + # vth = sqrt(2*p/n) + # δvth = vth / (2 * p) * δp + # + # sigma = -u / vth + # δsigma = u / vth^2 δvth + # = u / (2 * vth * p) * δp + # + # We could write sigma_fraction as + # sigma_fraction = (sigma - vpa[sigma_ind-1]) / (vpa[sigma_ind] - vpa[sigma_ind-1]) + # so that + # δsigma_fraction = δsigma / (vpa[sigma_ind] - vpa[sigma_ind-1]) + # = u / (2 * vth * p) / (vpa[sigma_ind] - vpa[sigma_ind-1]) * δp + # + # minus_vcut_fraction = ((-vcut - u)/vth - vpa[minus_vcut_ind-1]) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) + # δminus_vcut_fraction = (vcut + u) / vth^2 / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δvth + # = (vcut + u) / (2 * vth * p) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δp + # + # plus_vcut_fraction = ((vcut - u)/vth - vpa[plus_vcut_ind-1]) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) + # δplus_vcut_fraction = -(vcut - u) / vth^2 / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δvth + # = -(vcut - u) / (2 * vth * p) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δp + density_integral_pieces_lowerz = vpa.scratch3 + flow_integral_pieces_lowerz = vpa.scratch4 + energy_integral_pieces_lowerz = vpa.scratch5 + cubic_integral_pieces_lowerz = vpa.scratch6 + quartic_integral_pieces_lowerz = vpa.scratch7 + fill_integral_pieces!( + pdf_lowerz, vthe_lower, vpa, vpa_unnorm, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz) + vpa_grid = vpa.grid + + dsigma_dp = upar_lower / (2.0 * vthe_lower * ppar_lower) + + dsigma_fraction_dp = dsigma_dp / (vpa_grid[sigma_ind] - vpa_grid[sigma_ind-1]) + + dminus_vcut_fraction_dp = (vcut + upar_lower) / (2.0 * vthe_lower * ppar_lower) / (vpa_grid[minus_vcut_ind] - vpa_grid[minus_vcut_ind-1]) + + dplus_vcut_fraction_dp = -(vcut - upar_lower) / (2.0 * vthe_lower * ppar_lower) / (vpa_grid[plus_vcut_ind+1] - vpa_grid[plus_vcut_ind]) + + density_integral_sigma_cell = 0.5 * (density_integral_pieces_lowerz[sigma_ind-1] + + density_integral_pieces_lowerz[sigma_ind]) + da2_minus_a3_dp = ( + # Contribution from integral limits at sigma + 2.0 * density_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in da3_dp + - density_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # No contribution from explicit σ factors in w-integral for a2 or a3 as + # integrand does not contain σ. + # Change in a3 integral due to different interpolated ̂g values + - da3_dp + ) + + dminus_b2_minus_b3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in db3_dp + + flow_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + (a2 + a3) * dsigma_dp + # Change in b3 integral due to different interpolated ̂g values + - db3_dp + ) + + energy_integral_sigma_cell = 0.5 * (energy_integral_pieces_lowerz[sigma_ind-1] + + energy_integral_pieces_lowerz[sigma_ind]) + dc2_minus_c3_dp = ( + # Contribution from integral limits at sigma + 2.0 * energy_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in dc3_dp + - energy_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 2.0 * (-b2 + b3) * dsigma_dp + # Change in c3 integral due to different interpolated ̂g values + - dc3_dp + ) + + dminus_d2_minus_d3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in dd3_dp + + cubic_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 3.0 * (c2 + c3) * dsigma_dp + # Change in d3 integral due to different interpolated ̂g values + - dd3_dp + ) + + correction0_integral_pieces = @. vpa.scratch3 = pdf_lowerz * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2) + for ivpa ∈ 1:sigma_ind + correction0_integral_pieces[ivpa] = 0.0 + end + + correctionminus1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces / vpa_unnorm * vthe_lower + integralminus1 = sum(@view(correctionminus1_integral_pieces[sigma_ind+1:upper_cutoff_ind])) + + correction0_integral_type2_pieces = @. vpa.scratch4 = pdf_lowerz * vpa.wgts / sqrt(π) * 2.0 * integral_correction_sharpness^2 * vpa_unnorm^3 / vthe_lower^3 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2)^2 + integral_type2 = sum(@view(correction0_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dalpha_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dalpha_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * integralminus1 + integral_type2) * dsigma_dp + # Change in alpha integral due to different interpolated ̂g values + + dalpha_dp_interp + ) + + correction1_integral_pieces = @. vpa.scratch5 = correction0_integral_pieces * vpa_unnorm / vthe_lower + correction1_integral_type2_pieces = @. vpa.scratch6 = correction0_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction1_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dbeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dbeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * alpha + integral_type2) * dsigma_dp + # Change in beta integral due to different interpolated ̂g values + + dbeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction1_integral_pieces + # and correction1_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe_lower + correction2_integral_type2_pieces = @. vpa.scratch6 = correction1_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction2_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dgamma_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dgamma_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * beta + integral_type2) * dsigma_dp + # Change in gamma integral due to different interpolated ̂g values + + dgamma_dp_interp + ) + + # Here we overwrite the buffers that were used for correction2_integral_pieces + # and correction2_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction3_integral_pieces = @. vpa.scratch5 = correction2_integral_pieces * vpa_unnorm / vthe_lower + correction3_integral_type2_pieces = @. vpa.scratch6 = correction2_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction3_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + ddelta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in ddelta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * gamma + integral_type2) * dsigma_dp + # Change in delta integral due to different interpolated ̂g values + + ddelta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction3_integral_pieces + # and correction3_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction4_integral_pieces = @. vpa.scratch5 = correction3_integral_pieces * vpa_unnorm / vthe_lower + correction4_integral_type2_pieces = @. vpa.scratch6 = correction3_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction4_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + depsilon_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in depsilon_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * delta + integral_type2) * dsigma_dp + # Change in epsilon integral due to different interpolated ̂g values + + depsilon_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction5_integral_pieces = @. vpa.scratch5 = correction4_integral_pieces * vpa_unnorm / vthe_lower + correction5_integral_type2_pieces = @. vpa.scratch6 = correction4_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction5_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dzeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dzeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * epsilon + integral_type2) * dsigma_dp + # Change in zeta integral due to different interpolated ̂g values + + dzeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction6_integral_pieces = @. vpa.scratch5 = correction5_integral_pieces * vpa_unnorm / vthe_lower + correction6_integral_type2_pieces = @. vpa.scratch6 = correction5_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction6_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + deta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in deta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * zeta + integral_type2) * dsigma_dp + # Change in eta integral due to different interpolated ̂g values + + deta_dp_interp + ) + + dA_dp, dB_dp, dC_dp, dD_dp = correction_matrix \ ( + [da2_minus_a3_dp, dminus_b2_minus_b3_dp, dc2_minus_c3_dp, dminus_d2_minus_d3_dp] + - [dalpha_dp dbeta_dp dgamma_dp ddelta_dp ; + dbeta_dp dgamma_dp ddelta_dp depsilon_dp ; + dgamma_dp ddelta_dp depsilon_dp dzeta_dp ; + ddelta_dp depsilon_dp dzeta_dp deta_dp + ] + * [A, B, C, D] + ) + + @views @. jacobian_zbegin_ppar[output_range] -= + (dA_dp + + dB_dp * v_over_vth + + dC_dp * v_over_vth^2 + + dD_dp * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_lowerz[output_range] + + # Add variation due to variation of ̃v coordinate with δp. + # These contributions seem to make almost no difference, and could probably be + # skipped. + dv_over_vth_dp = -dsigma_dp + @views @. jacobian_zbegin_ppar[output_range] -= ( + (B * dv_over_vth_dp + + 2.0 * C * v_over_vth * dv_over_vth_dp + + 3.0 * D * v_over_vth^2 * dv_over_vth_dp) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_lowerz[output_range] + + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + (2.0 * integral_correction_sharpness * v_over_vth * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2) + - 2.0 * integral_correction_sharpness^2 * v_over_vth^3 * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)^2) * + pdf_lowerz[output_range] + ) + end + end + + if include_upper + shared_mem_parallel && begin_vperp_region() + @loop_vperp ivperp begin + # Skip vperp boundary points. + if vperp.n > 1 && ivperp == vperp.n + continue + end + + # Get matrix entries for the response of the sheath-edge boundary condition. + # Ignore constraints, as these are non-linear and also should be small + # corrections which should not matter much for a preconditioner. + + jac_range = pdf_offset+(zend-1)*vperp.n*vpa.n+(ivperp-1)*vpa.n+1 : pdf_offset+(zend-1)*vperp.n*vpa.n+ivperp*vpa.n + jacobian_zend = @view jacobian[jac_range,jac_range] + jacobian_zend_ppar = @view jacobian[jac_range,ppar_offset+zend] + + vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, first_point_near_zero, + reversed_wpa_of_minus_vpa = + get_cutoff_params_upper(upar_upper, vthe_upper, phi_upper, me_over_mi, + vpa) + + minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) + # minus_vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and + # minus_vcut_ind where -vcut is. + minus_vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + + if minus_vcut_fraction < 0.5 + first_nonzero_ind = minus_vcut_ind - 1 + else + first_nonzero_ind = minus_vcut_ind + end + + # Interpolate to the 'near zero' points + @views fill_interpolate_symmetric_matrix!( + jacobian_zend[first_point_near_zero:sigma_ind,sigma_ind+1:element_with_zero_boundary], + vpa_unnorm[first_point_near_zero:sigma_ind], + vpa_unnorm[sigma_ind+1:element_with_zero_boundary]) + + # Interpolate to the 'far from zero' points + @views fill_1d_interpolation_matrix!( + jacobian_zend[first_point_near_zero-1:-1:first_nonzero_ind,:], + reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:vpa.n-first_nonzero_ind+1], + vpa, vpa_spectral) + + # Reverse the sign of the elements we just filled + jacobian_zend[first_nonzero_ind:sigma_ind,sigma_ind+1:end] .*= -1.0 + + if minus_vcut_fraction < 0.5 + jacobian_zend[first_nonzero_ind,sigma_ind+1:end] .*= 0.5 - minus_vcut_fraction + else + jacobian_zend[first_nonzero_ind,sigma_ind+1:end] .*= 1.5 - minus_vcut_fraction + end + + # Fill in elements giving response to changes in electron_ppar + # A change to ppar results in a shift in the location of w_∥(v_∥=0). + # The interpolated values of g_e(w_∥) that are filled by the boundary + # condition are (dropping _∥ subscripts for the remaining comments in this + # if-clause) + # g(w|v<0) = g(2*u/vth - w) + # So + # δg(w|v<0) = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δvth + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δ(sqrt(2*ppar/n) + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δppar * vth / (2*ppar) + # = -u/vth/ppar * dg/dw(2*u/vth - w) * δppar + # + # As jacobian_zend_ppar only depends on variations of a single value + # `ppar[end]` it might be more maintainable and computationally cheaper to + # calculate jacobian_zend_ppar with a finite difference method (applying the + # boundary condition function with a perturbed pressure `ppar[end] + ϵ` for + # some small ϵ) rather than calculating it using the method below. If we used + # the finite difference approach, we would have to be careful that the ϵ step + # does not change the cutoff index (otherwise we would pick up non-smooth + # changes) - one possibility might to be to switch to `-ϵ` instead of `+ϵ` if + # this happens. + + dpdfdv_near_zero = @view vpa.scratch[first_point_near_zero:sigma_ind] + @views interpolate_symmetric!(dpdfdv_near_zero, + vpa_unnorm[first_point_near_zero:sigma_ind], + pdf_upper[sigma_ind+1:element_with_zero_boundary,ivperp], + vpa_unnorm[sigma_ind+1:element_with_zero_boundary], + Val(1)) + # The above call to interpolate_symmetric calculates dg/dv rather than dg/dw, + # so need to multiply by an extra factor of vthe. + # δg(w|v<0) = -u/ppar * dg/dv(2*u/vth - w) * δppar + @. jacobian_zend_ppar[first_point_near_zero:sigma_ind] -= + -upar_upper / ppar_upper * dpdfdv_near_zero + + dpdfdw_far_from_zero = @view vpa.scratch[first_nonzero_ind:first_point_near_zero-1] + @views interpolate_to_grid_1d!(dpdfdw_far_from_zero, + reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:vpa.n-first_nonzero_ind+1], + pdf_upper[:,ivperp], vpa, vpa_spectral, Val(1)) + reverse!(dpdfdw_far_from_zero) + # Note that because we calculated the derivative of the interpolating + # function, and then reversed the results, we need to multiply the derivative + # by -1. + @. jacobian_zend_ppar[first_nonzero_ind:first_point_near_zero-1] -= + upar_upper / vthe_upper / ppar_upper * dpdfdw_far_from_zero + + # Whatever the variation due to interpolation is at the last nonzero grid + # point, it will be reduced by the cutoff. + if minus_vcut_fraction < 0.5 + jacobian_zend_ppar[first_nonzero_ind] *= 0.5 - minus_vcut_fraction + else + jacobian_zend_ppar[first_nonzero_ind] *= 1.5 - minus_vcut_fraction + end + + # The change in electron_ppar also changes the position of + # wcut = (-vcut - upar)/vthe + # as vcut does not change (within the Krylov iteration where this + # preconditioner matrix is used), but vthe does because of the change in + # electron_ppar. We actually use minus_vcut_fraction calculated from + # vpa_unnorm, so it is most convenient to consider: + # v = vthe * w + upar + # δv = δvthe * w + # = δvthe * (v - upar)/vthe + # = δppar * vthe / (2*ppar) * (v - upar)/vthe + # = δppar * (v - upar) / 2 / ppar + # with vl and vu the values of v at the grid points below and above vcut + # minus_vcut_fraction = (-vcut - vl) / (vu - vl) + # δminus_vcut_fraction = -(-vcut - vl) / (vu - vl)^2 * (δvu - δvl) - δvl / (vu - vl) + # δminus_vcut_fraction = [-(-vcut - vl) / (vu - vl)^2 * (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δminus_vcut_fraction = [-(-vcut - vl) / (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δminus_vcut_fraction = -(-vcut - upar) / (vu - vl) / 2 / ppar * δppar + # δminus_vcut_fraction = (vcut + upar) / (vu - vl) / 2 / ppar * δppar + interpolated_pdf_at_first_nonzero_ind = @view vpa.scratch[1:1] + reversed_first_nonzero_ind = vpa.n-first_nonzero_ind+1 + @views interpolate_to_grid_1d!(interpolated_pdf_at_first_nonzero_ind, + reversed_wpa_of_minus_vpa[reversed_first_nonzero_ind:reversed_first_nonzero_ind], + pdf_upper[:,ivperp], vpa, vpa_spectral) + + dminus_vcut_fraction_dp = (vcut + upar_upper) / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) / 2.0 / ppar_upper + # Note that pdf_upper[first_nonzero_ind,ivperp] depends on -minus_vcut_fraction, so + # need a -'ve sign in the following line. + jacobian_zend_ppar[first_nonzero_ind] -= -interpolated_pdf_at_first_nonzero_ind[] * dminus_vcut_fraction_dp + + # Calculate some numerical integrals of dpdfdw that we will need later + function get_part3_for_one_moment_upper(integral_pieces) + # Integral contribution from the cell containing sigma + # The contribution from integral_pieces[sigma_ind+1] should be dropped, + # because that point is on the input grid, and this correction is due to + # the changes in interpolated values on the output grid due to δp_e∥. The + # value of g_e at sigma_ind+1 isn't interpolated so does not change in + # this way. + integral_sigma_cell = 0.5 * integral_pieces[sigma_ind] + + part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + # The contents of jacobian_zend_ppar at this point are the coefficients needed + # to get the new distribution function due to a change δp, which we can now + # use to calculate the response of various integrals to the same change. Note + # that jacobian_zend_ppar already contains `2.0*dsigma_dp`. + @. vpa.scratch = -jacobian_zend_ppar * vpa.wgts / sqrt(π) + da3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + db3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dc3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dd3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch = -jacobian_zend_ppar * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2) + vpa.scratch[sigma_ind:sigma_ind+1] .= 0.0 + dalpha_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dbeta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dgamma_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + ddelta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + depsilon_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dzeta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + deta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + + pdf_upperz = vpa.scratch10 + @views @. pdf_upperz[sigma_ind:end] = pdf_upper[sigma_ind:end,ivperp] + + # interpolate the pdf_upperz onto this grid + # 'near zero' means in the range where + # abs(v_∥)≤abs(upper boundary of element including v_∥=0) + # 'far from zero' means more negative values of v_∥. + + # Interpolate to the 'near zero' points + @views interpolate_symmetric!(pdf_upperz[first_point_near_zero:sigma_ind], + vpa_unnorm[first_point_near_zero:sigma_ind], + pdf_upperz[sigma_ind+1:element_with_zero_boundary], + vpa_unnorm[sigma_ind+1:element_with_zero_boundary]) + + # Interpolate to the 'far from zero' points + reversed_pdf_far_from_zero = @view pdf_upperz[1:first_point_near_zero-1] + interpolate_to_grid_1d!(reversed_pdf_far_from_zero, + reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end], + pdf_upperz, vpa, vpa_spectral) + reverse!(reversed_pdf_far_from_zero) + + plus_vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + if plus_vcut_fraction > 0.5 + upper_cutoff_ind = plus_vcut_ind+1 + else + upper_cutoff_ind = plus_vcut_ind + end + if minus_vcut_fraction < 0.5 + lower_cutoff_ind = minus_vcut_ind-1 + lower_cutoff_factor = 0.5 - minus_vcut_fraction + else + lower_cutoff_ind = minus_vcut_ind + lower_cutoff_factor = 1.5 - minus_vcut_fraction + end + pdf_upperz[lower_cutoff_ind] *= lower_cutoff_factor + pdf_upperz[1:lower_cutoff_ind-1] .= 0.0 + + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_upperz_integral_correction_components( + pdf_upperz, vthe_upper, vpa, vpa_unnorm, u_over_vt, + sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, false) + + output_range = lower_cutoff_ind:sigma_ind + + v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_upper + + correction_matrix = [alpha beta gamma delta ; + beta gamma delta epsilon ; + gamma delta epsilon zeta ; + delta epsilon zeta eta + ] + + # jacobian_zbegin_ppar state at this point would generate (when multiplied by + # some δp) a δf due to the effectively-shifted grid. The unperturbed f + # required corrections to make its integrals correct. Need to apply the same + # corrections to δf. + # This term seems to have a very small contribution, and could probably be + # skipped. + A, B, C, D = correction_matrix \ [a2-a3, -b2-b3, c2-c3, -d2-d3] + @. jacobian_zend_ppar[output_range] *= + 1.0 + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) + + # Calculate the changes in the integrals a2, b2, c2, d2, a3, b3, c3, and d3 in + # response to changes in electron_ppar. The changes are calculated for the + # combinations (a2-a3), (-b2-b3), (c2-c3), and (-d2-d3) to take advantage of + # some cancellations. + + # Need to calculate the variation of various intermediate quantities with + # δp. + # + # vth = sqrt(2*p/n) + # δvth = vth / (2 * p) * δp + # + # sigma = -u / vth + # δsigma = u / vth^2 δvth + # = u / (2 * vth * p) * δp + # + # We could write sigma_fraction as + # sigma_fraction = (sigma - vpa[sigma_ind]) / (vpa[sigma_ind+1] - vpa[sigma_ind]) + # so that + # δsigma_fraction = δsigma / (vpa[sigma_ind+1] - vpa[sigma_ind]) + # = u / (2 * vth * p) / (vpa[sigma_ind+1] - vpa[sigma_ind]) * δp + # + # minus_vcut_fraction = ((-vcut - u)/vth - vpa[minus_vcut_ind-1]) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) + # δminus_vcut_fraction = (vcut + u) / vth^2 / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δvth + # = (vcut + u) / (2 * vth * p) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δp + # + # plus_vcut_fraction = ((vcut - u)/vth - vpa[plus_vcut_ind-1]) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) + # δplus_vcut_fraction = -(vcut - u) / vth^2 / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δvth + # = -(vcut - u) / (2 * vth * p) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δp + + density_integral_pieces_upperz = vpa.scratch3 + flow_integral_pieces_upperz = vpa.scratch4 + energy_integral_pieces_upperz = vpa.scratch5 + cubic_integral_pieces_upperz = vpa.scratch6 + quartic_integral_pieces_upperz = vpa.scratch7 + fill_integral_pieces!( + pdf_upperz, vthe_upper, vpa, vpa_unnorm, density_integral_pieces_upperz, + flow_integral_pieces_upperz, energy_integral_pieces_upperz, + cubic_integral_pieces_upperz, quartic_integral_pieces_upperz) + vpa_grid = vpa.grid + + dsigma_dp = upar_upper / (2.0 * vthe_upper * ppar_upper) + + dsigma_fraction_dp = dsigma_dp / (vpa_grid[sigma_ind+1] - vpa_grid[sigma_ind]) + + dminus_vcut_fraction_dp = (vcut + upar_upper) / (2.0 * vthe_upper * ppar_upper) / (vpa_grid[minus_vcut_ind] - vpa_grid[minus_vcut_ind-1]) + + dplus_vcut_fraction_dp = -(vcut - upar_upper) / (2.0 * vthe_upper * ppar_upper) / (vpa_grid[plus_vcut_ind+1] - vpa_grid[plus_vcut_ind]) + + + density_integral_sigma_cell = 0.5 * (density_integral_pieces_upperz[sigma_ind] + + density_integral_pieces_upperz[sigma_ind+1]) + da2_minus_a3_dp = ( + # Contribution from integral limits at sigma + -2.0 * density_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in da3_dp + + density_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # No contribution from w-integral for a2 or a3 as integrand does not + # depend on ppar. + # Change in a3 integral due to different interpolated ̂g values + - da3_dp + ) + + dminus_b2_minus_b3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in db3_dp + - flow_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + (a2 + a3) * dsigma_dp + # Change in b3 integral due to different interpolated ̂g values + - db3_dp + ) + + energy_integral_sigma_cell = 0.5 * (energy_integral_pieces_upperz[sigma_ind] + + energy_integral_pieces_upperz[sigma_ind+1]) + dc2_minus_c3_dp = ( + # Contribution from integral limits at sigma + -2.0 * energy_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in dc3_dp + + energy_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 2.0 * (-b2 + b3) * dsigma_dp + # Change in c3 integral due to different interpolated ̂g values + - dc3_dp + ) + + dminus_d2_minus_d3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in dc3_dp + - cubic_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 3.0 * (c2 + c3) * dsigma_dp + # Change in d3 integral due to different interpolated ̂g values + - dd3_dp + ) + + correction0_integral_pieces = @. vpa.scratch3 = pdf_upperz * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2) + for ivpa ∈ sigma_ind:vpa.n + correction0_integral_pieces[ivpa] = 0.0 + end + + correctionminus1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces / vpa_unnorm * vthe_upper + integralminus1 = sum(@view(correctionminus1_integral_pieces[lower_cutoff_ind:sigma_ind-1])) + + correction0_integral_type2_pieces = @. vpa.scratch4 = pdf_upperz * vpa.wgts / sqrt(π) * 2.0 * integral_correction_sharpness^2 * vpa_unnorm^3 / vthe_upper^3 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2)^2 + integral_type2 = sum(@view(correction0_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dalpha_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dalpha_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + (-2.0 * integralminus1 + integral_type2) * dsigma_dp + # Change in alpha integral due to different interpolated ̂g values + + dalpha_dp_interp + ) + + correction1_integral_pieces = @. vpa.scratch5 = correction0_integral_pieces * vpa_unnorm / vthe_upper + correction1_integral_type2_pieces = @. vpa.scratch6 = correction0_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction1_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dbeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dbeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * alpha + integral_type2) * dsigma_dp + # Change in beta integral due to different interpolated ̂g values + + dbeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction1_integral_pieces + # and correction1_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe_upper + correction2_integral_type2_pieces = @. vpa.scratch6 = correction1_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction2_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dgamma_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dgamma_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * beta + integral_type2) * dsigma_dp + # Change in gamma integral due to different interpolated ̂g values + + dgamma_dp_interp + ) + + # Here we overwrite the buffers that were used for correction2_integral_pieces + # and correction2_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction3_integral_pieces = @. vpa.scratch5 = correction2_integral_pieces * vpa_unnorm / vthe_upper + correction3_integral_type2_pieces = @. vpa.scratch6 = correction2_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction3_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + ddelta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in ddelta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * gamma + integral_type2) * dsigma_dp + # Change in delta integral due to different interpolated ̂g values + + ddelta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction3_integral_pieces + # and correction3_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction4_integral_pieces = @. vpa.scratch5 = correction3_integral_pieces * vpa_unnorm / vthe_upper + correction4_integral_type2_pieces = @. vpa.scratch6 = correction3_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction4_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + depsilon_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in depsilon_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * delta + integral_type2) * dsigma_dp + # Change in epsilon integral due to different interpolated ̂g values + + depsilon_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction5_integral_pieces = @. vpa.scratch5 = correction4_integral_pieces * vpa_unnorm / vthe_upper + correction5_integral_type2_pieces = @. vpa.scratch6 = correction4_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction5_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dzeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dzeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * epsilon + integral_type2) * dsigma_dp + # Change in zeta integral due to different interpolated ̂g values + + dzeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction6_integral_pieces = @. vpa.scratch5 = correction5_integral_pieces * vpa_unnorm / vthe_upper + correction6_integral_type2_pieces = @. vpa.scratch6 = correction5_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction6_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + deta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in deta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * zeta + integral_type2) * dsigma_dp + # Change in eta integral due to different interpolated ̂g values + + deta_dp_interp + ) + + dA_dp, dB_dp, dC_dp, dD_dp = correction_matrix \ ( + [da2_minus_a3_dp, dminus_b2_minus_b3_dp, dc2_minus_c3_dp, dminus_d2_minus_d3_dp] + - [dalpha_dp dbeta_dp dgamma_dp ddelta_dp ; + dbeta_dp dgamma_dp ddelta_dp depsilon_dp ; + dgamma_dp ddelta_dp depsilon_dp dzeta_dp ; + ddelta_dp depsilon_dp dzeta_dp deta_dp + ] + * [A, B, C, D] + ) + + output_range = lower_cutoff_ind:sigma_ind-1 + v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_upper + @views @. jacobian_zend_ppar[output_range] -= + (dA_dp + + dB_dp * v_over_vth + + dC_dp * v_over_vth^2 + + dD_dp * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_upperz[output_range] + + # Add variation due to variation of ̃v coordinate with δp. + # These contributions seem to make almost no difference, and could probably be + # skipped. + dv_over_vth_dp = -dsigma_dp + @views @. jacobian_zend_ppar[output_range] -= ( + (B * dv_over_vth_dp + + 2.0 * C * v_over_vth * dv_over_vth_dp + + 3.0 * D * v_over_vth^2 * dv_over_vth_dp) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_upperz[output_range] + + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + (2.0 * integral_correction_sharpness * v_over_vth * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2) + - 2.0 * integral_correction_sharpness^2 * v_over_vth^3 * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)^2) * + pdf_upperz[output_range] + ) + end + end + + return nothing +end + """ electron_adaptive_timestep_update!(scratch, t, t_params, moments, phi, z_advect, vpa_advect, composition, r, z, vperp, vpa, @@ -3484,11 +4500,12 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati `evolve_ppar=true`) the electron energy equation. """ @timeit global_timer fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, - vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, t_params, ion_dt, ir, evolve_ppar, - include=:all, include_qpar_integral_terms=true) = begin + jacobian_matrix, f, ppar, moments, phi_global, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, + evolve_ppar, include=:all, + include_qpar_integral_terms=true) = begin dt = t_params.dt[] buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] @@ -3506,6 +4523,7 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati dppar_dz = @view moments.electron.dppar_dz[:,ir] dvth_dz = @view moments.electron.dvth_dz[:,ir] dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + phi = @view phi_global[:,ir] upar_ion = @view moments.ion.upar[:,ir,1] @@ -3627,19 +4645,22 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( jacobian_matrix, z, dt, ion_dt, ir, include; ppar_offset=pdf_size) end + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, include; + ppar_offset=pdf_size) return nothing end """ - fill_electron_kinetic_equation_v_only_Jacobian!(jacobian_matrix, f, ppar, moments, - collisions, composition, z, vperp, - vpa, z_spectral, vperp_specral, - vpa_spectral, z_advect, vpa_advect, - scratch_dummy, - external_source_settings, - num_diss_params, t_params, ion_dt, ir, - iz, evolve_ppar, include=:all) + fill_electron_kinetic_equation_v_only_Jacobian!() + jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, zeroth_moment, + first_moment, second_moment, third_moment, dthird_moment_dz, phi, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, + ion_dt, ir, iz, evolve_ppar) Fill a pre-allocated matrix with the Jacobian matrix for a velocity-space solve part of the ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron @@ -3648,7 +4669,7 @@ energy equation. @timeit global_timer fill_electron_kinetic_equation_v_only_Jacobian!( jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, zeroth_moment, first_moment, second_moment, third_moment, - dthird_moment_dz, collisions, composition, z, vperp, vpa, + dthird_moment_dz, phi, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, iz, evolve_ppar) = begin @@ -3667,7 +4688,6 @@ energy equation. upar_ion = moments.ion.upar[iz,ir,1] - pdf_size = z.n * vperp.n * vpa.n v_size = vperp.n * vpa.n # Initialise jacobian_matrix to the identity @@ -3711,6 +4731,14 @@ energy equation. jacobian_matrix, z, dt, ion_dt, ir, iz) end + if t_params.include_wall_bc_in_preconditioner + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :implicit_v, iz; + ppar_offset=v_size) + end + return nothing end diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index b699aa563..0e06891b7 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -8,7 +8,8 @@ using moment_kinetics: setup_moment_kinetics, cleanup_moment_kinetics! using moment_kinetics.analysis: vpagrid_to_dzdt using moment_kinetics.array_allocation: allocate_shared_float using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!, - enforce_vperp_boundary_condition! + enforce_vperp_boundary_condition!, + skip_f_electron_bc_points_in_Jacobian using moment_kinetics.calculus: derivative! using moment_kinetics.communication using moment_kinetics.derivatives: derivative_z!, derivative_z_pdf_vpavperpz! @@ -28,10 +29,13 @@ using moment_kinetics.electron_kinetic_equation: add_contribution_from_pdf_term! add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!, add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!, electron_kinetic_equation_euler_update!, + enforce_boundary_condition_on_electron_pdf!, fill_electron_kinetic_equation_Jacobian!, fill_electron_kinetic_equation_v_only_Jacobian!, fill_electron_kinetic_equation_z_only_Jacobian_f!, - fill_electron_kinetic_equation_z_only_Jacobian_ppar! + fill_electron_kinetic_equation_z_only_Jacobian_ppar!, + add_wall_boundary_condition_to_Jacobian!, + zero_z_boundary_condition_points using moment_kinetics.electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa!, add_electron_vpa_advection_to_Jacobian!, @@ -55,10 +59,12 @@ using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!, add_electron_implicit_constraint_forcing_to_z_only_Jacobian!, add_electron_implicit_constraint_forcing_to_v_only_Jacobian!, hard_force_moment_constraints! +using moment_kinetics.timer_utils: reset_mk_timers! using moment_kinetics.type_definitions: mk_float using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r!, integrate_over_vspace +using LinearAlgebra using StatsBase # Small parameter used to create perturbations to test Jacobian against @@ -208,6 +214,9 @@ test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix" ) function get_mk_state(test_input) + # Reset timers in case there was a previous run which did not clean them up. + reset_mk_timers!() + mk_state = nothing quietoutput() do mk_state = setup_moment_kinetics(test_input; skip_electron_solve=true) @@ -460,7 +469,9 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -797,7 +808,9 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -1148,7 +1161,9 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -1444,7 +1459,9 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -1759,7 +1776,9 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -2088,7 +2107,9 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -2431,7 +2452,9 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -3098,18 +3121,22 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 end function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) - test_input = deepcopy(test_input) - test_input["output"]["run_name"] *= "_electron_kinetic_equation" - println(" - electron_kinetic_equation") - @testset "electron_kinetic_equation" begin + # Looser rtol for "wall" bc because integral corrections not accounted for in wall bc + # Jacobian (yet?). + @testset "electron_kinetic_equation bc=$bc" for (bc, adi_tol) ∈ (("constant", 1.0e-15), ("wall", 1.0e-13)) + println(" - electron_kinetic_equation $bc") + this_test_input = deepcopy(test_input) + this_test_input["output"]["run_name"] *= "_electron_kinetic_equation_$bc" + this_test_input["z"]["bc"] = bc + # Suppress console output while running pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, composition, collisions, geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, nl_solver_params, advance, advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, - ascii_io, io_moments, io_dfns = get_mk_state(test_input) + ascii_io, io_moments, io_dfns = get_mk_state(this_test_input) dens = @view moments.electron.dens[:,ir] upar = @view moments.electron.upar[:,ir] @@ -3270,9 +3297,9 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) # Add 'explicit' contribution # Use jacobian_matrix as a temporary buffer here. fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, - scratch_dummy, external_source_settings, num_diss_params, + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_v) begin_serial_region() @serial_region begin @@ -3280,10 +3307,10 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, - true) + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, + t_params.electron, ion_dt, ir, true) begin_serial_region() @serial_region begin @@ -3292,7 +3319,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) # Jacobian matrix functions without being too messed up by floating-point # rounding errors. The result is that some entries in the Jacobian matrix # here are O(1.0e5), so it is important to use `rtol` here. - @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-15, atol=1.0e-15) + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=adi_tol, atol=1.0e-15) end end @@ -3320,18 +3347,18 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], ppar[iz], dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, zeroth_moment[iz], first_moment[iz], second_moment[iz], - third_moment[iz], dthird_moment_dz[iz], collisions, composition, z, - vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, external_source_settings, num_diss_params, - t_params.electron, ion_dt, ir, iz, true) + third_moment[iz], dthird_moment_dz[iz], fields.phi[iz,ir], collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params.electron, ion_dt, ir, iz, true) end # Add 'explicit' contribution # Use jacobian_matrix as a temporary buffer here. fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, - scratch_dummy, external_source_settings, num_diss_params, + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_z) begin_serial_region() @@ -3340,10 +3367,10 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, - true) + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, + t_params.electron, ion_dt, ir, true) begin_serial_region() @serial_region begin @@ -3352,7 +3379,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) # Jacobian matrix functions without being too messed up by floating-point # rounding errors. The result is that some entries in the Jacobian matrix # here are O(1.0e5), so it is important to use `rtol` here. - @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-13, atol=1.0e-13) + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=10.0*adi_tol, atol=1.0e-13) end end @@ -3420,36 +3447,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) - # Boundary conditions on incoming part of distribution function. Note - # that as density, upar, ppar do not change in this implicit step, - # f_electron_newvar, f_old, and residual should all be zero at exactly - # the same set of grid points, so it is reasonable to zero-out - # `residual` to impose the boundary condition. We impose this after - # subtracting f_old in case rounding errors, etc. mean that at some - # point f_old had a different boundary condition cut-off index. - begin_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - iz = 1 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] > -zero - residual_f[ivpa,ivperp,iz] = 0.0 - end - end - end - if z.irank == z.nrank - 1 - iz = z.n - v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] < zero - residual_f[ivpa,ivperp,iz] = 0.0 - end - end - end - end + zero_z_boundary_condition_points(residual_f, z, vpa, moments, ir) return nothing end @@ -3457,15 +3455,30 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) original_residual_p = allocate_shared_float(size(ppar)...) perturbed_residual_f = allocate_shared_float(size(f)...) perturbed_residual_p = allocate_shared_float(size(ppar)...) + f_plus_delta_f = allocate_shared_float(size(f)...) + f_with_delta_p = allocate_shared_float(size(f)...) + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_plus_delta_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + delta_f[ivpa,ivperp,iz] + f_with_delta_p[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + p_plus_delta_p = allocate_shared_float(size(ppar)...) + begin_z_region() + @loop_z iz begin + p_plus_delta_p[iz] = ppar[iz] + delta_p[iz] + end @testset "δf only" begin residual_func!(original_residual_f, original_residual_p, f, ppar) - residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f_plus_delta_f, ppar) begin_serial_region() @serial_region begin delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) residual_update_with_Jacobian = jacobian_matrix * delta_state perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] @@ -3483,11 +3496,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) @testset "δp only" begin residual_func!(original_residual_f, original_residual_p, f, ppar) - residual_func!(perturbed_residual_f, perturbed_residual_p, f, ppar.+delta_p) + residual_func!(perturbed_residual_f, perturbed_residual_p, f_with_delta_p, p_plus_delta_p) begin_serial_region() @serial_region begin delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_with_delta_p. + delta_state[1:pdf_size] .= vec(f_with_delta_p .- f) delta_state[pdf_size+1:end] .= vec(delta_p) residual_update_with_Jacobian = jacobian_matrix * delta_state perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] @@ -3506,12 +3523,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) @testset "δf and δp" begin residual_func!(original_residual_f, original_residual_p, f, ppar) - residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar.+delta_p) + residual_func!(perturbed_residual_f, perturbed_residual_p, f_plus_delta_f, p_plus_delta_p) begin_serial_region() @serial_region begin delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) delta_state[pdf_size+1:end] .= vec(delta_p) residual_update_with_Jacobian = jacobian_matrix * delta_state perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] @@ -3534,6 +3554,408 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) return nothing end +function test_electron_wall_bc(test_input; atol=(7.0*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_wall_bc" + println(" - electron_wall_bc") + + # This test only affects the end-points in z, so only include those points to avoid an + # over-optimistic error estimate due the time update matrix for all other z-indices + # just being the identity. + test_input["z"]["nelement"] = 1 + test_input["z"]["ngrid"] = 2 + test_input["z"]["bc"] = "wall" + + # Interpolation done during the boundary condition needs to be reasonably accurate for + # the simplified form (without constraints) that is done in the 'Jacobian matrix' to + # match the full version, so increase vpa resolution. + test_input["vpa"]["nelement"] = 256 + test_input["vz"]["nelement"] = 256 + + @testset "electron_wall_bc" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + phi = @view fields.phi[:,ir] + z_spectral = spectral_objects.z_spectral + vperp_spectral = spectral_objects.vperp_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + v_size = vperp.n * vpa.n + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f, phi, vth, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, + vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi, ir; bc_constraints=false, update_vcut=false) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + # For this test have no z-dependence in delta_f so that it does not vanish + # at the z-boundaries + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude + + delta_f .= f_amplitude .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix .= 0.0 + end + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + + # Initialise identity matrix. + jacobian_matrix[row,row] = 1.0 + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + # Initialise identity matrix. + jacobian_matrix[row,row] = 1.0 + end + + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :all; + ppar_offset=pdf_size) + + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + + # There is no 'implicit z' contribution for wall bc + + # Add 'explicit' contribution + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix_ADI_check, phi, f, ppar, vth, upar, z, vperp, vpa, + vperp_spectral, vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :explicit_v; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], phi[iz], f[:,:,iz], + ppar[iz], vth[iz], upar[iz], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, + :implicit_v, iz; ppar_offset=v_size) + end + + # There is no 'explicit vpa' contribution for wall bc + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + this_f, phi, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi, ir; bc_constraints=false, + update_vcut=false) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if z.bc == "wall" + zero_z_boundary_condition_points(residual, z, vpa, moments, ir) + else + error("Testing wall bc but z_bc != \"wall\".") + end + + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + f_plus_delta_f = allocate_shared_float(size(f)...) + f_with_delta_p = allocate_shared_float(size(f)...) + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_plus_delta_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + delta_f[ivpa,ivperp,iz] + f_with_delta_p[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + p_plus_delta_p = allocate_shared_float(size(ppar)...) + begin_z_region() + @loop_z iz begin + p_plus_delta_p[iz] = ppar[iz] + delta_p[iz] + end + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f_plus_delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + # Check that something happened, to make sure that for example the + # residual function and Jacobian don't both just zero out the boundary + # points. + @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12 + + # If the boundary condition is correctly implemented in the Jacobian, then + # if f+delta_f obeys the boundary condition, then J*delta_state should + # give zeros in the boundary points. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=atol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f_with_delta_p, p_plus_delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_with_delta_p. + delta_state[1:pdf_size] .= vec(f_with_delta_p .- f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + vec(delta_p); atol=1.0e-15) + + # Check that something happened, to make sure that for example the + # residual function and Jacobian don't both just zero out the boundary + # points. + @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12 + + # Use an absolute tolerance for this test because if we used a norm_factor + # like the other tests, it would be zero to machine precision at some + # points. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=atol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f_plus_delta_f, p_plus_delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + vec(delta_p); atol=1.0e-15) + + # Check that something happened, to make sure that for example the + # residual function and Jacobian don't both just zero out the boundary + # points. + @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12 + + # Use an absolute tolerance for this test because if we used a norm_factor + # like the other tests, it would be zero to machine precision at some + # points. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=atol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + function runtests() if Sys.isapple() && "CI" ∈ keys(ENV) && global_size[] > 1 # These tests are too slow in the parallel tests job on macOS, so skip in that @@ -3556,6 +3978,7 @@ function runtests() test_electron_implicit_constraint_forcing(test_input) test_electron_energy_equation(test_input) test_ion_dt_forcing_of_electron_ppar(test_input) + test_electron_wall_bc(test_input) test_electron_kinetic_equation(test_input) end diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index 36c74eb21..81d4c87c7 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -58,7 +58,8 @@ function linear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_int, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, @@ -171,7 +172,8 @@ function nonlinear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_int, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, From fc7bd426045802a69d594ead6a1807ded361ddf8 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 3 Dec 2024 16:59:29 +0000 Subject: [PATCH 06/10] Add shared-memory integer buffers to coordinate structs --- moment_kinetics/src/coordinates.jl | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 33c8b9299..7f71fd12a 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -8,7 +8,7 @@ export set_element_boundaries using LinearAlgebra using ..type_definitions: mk_float, mk_int, OptionsDict -using ..array_allocation: allocate_float, allocate_shared_float, allocate_int +using ..array_allocation: allocate_float, allocate_shared_float, allocate_int, allocate_shared_int using ..calculus: derivative! using ..chebyshev: scaled_chebyshev_grid, scaled_chebyshev_radau_grid, setup_chebyshev_pseudospectral using ..communication @@ -25,7 +25,7 @@ using OrderedCollections: OrderedDict """ structure containing basic information related to coordinates """ -struct coordinate{T <: AbstractVector{mk_float},Tbparams} +struct coordinate{T <: AbstractVector{mk_float}, Ti <: AbstractVector{mk_int}, Tbparams} # name is the name of the variable associated with this coordiante name::String # n_global is the total number of grid points associated with this coordinate @@ -109,6 +109,12 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams} # scratch_shared3 is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared3::T + # scratch_shared_int is a shared-memory array used for intermediate calculations + # requiring n integer entries + scratch_shared_int::Ti + # scratch_shared_int is a shared-memory array used for intermediate calculations + # requiring n integer entries + scratch_shared_int2::Ti # scratch_2d and scratch2_2d are arrays used for intermediate calculations requiring # ngrid x nelement entries scratch_2d::Array{mk_float,2} @@ -319,10 +325,14 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, scratch_shared = allocate_float(n_local) scratch_shared2 = allocate_float(n_local) scratch_shared3 = allocate_float(n_local) + scratch_shared_int = allocate_int(n_local) + scratch_shared_int2 = allocate_int(n_local) else scratch_shared = allocate_shared_float(n_local) scratch_shared2 = allocate_shared_float(n_local) scratch_shared3 = allocate_shared_float(n_local) + scratch_shared_int = allocate_shared_int(n_local) + scratch_shared_int2 = allocate_shared_int(n_local) end # Initialise scratch_shared* so that the debug checks do not complain when they get # printed by `println(io, all_inputs)` in mk_input(). @@ -330,6 +340,8 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, scratch_shared .= NaN scratch_shared2 .= NaN scratch_shared3 .= NaN + scratch_shared_int .= typemin(mk_int) + scratch_shared_int2 .= typemin(mk_int) end if !ignore_MPI _block_synchronize() @@ -396,10 +408,11 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), scratch_int_nelement_plus_1, scratch_shared, - scratch_shared2, scratch_shared3, scratch_2d, copy(scratch_2d), advection, - send_buffer, receive_buffer, comm, local_io_range, global_io_range, - element_scale, element_shift, coord_input.element_spacing_option, - element_boundaries, radau_first_element, other_nodes, one_over_denominator) + scratch_shared2, scratch_shared3, scratch_shared_int, scratch_shared_int2, + scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, comm, + local_io_range, global_io_range, element_scale, element_shift, + coord_input.element_spacing_option, element_boundaries, radau_first_element, + other_nodes, one_over_denominator) if coord.n == 1 && occursin("v", coord.name) spectral = null_velocity_dimension_info() From 6f42a3211f0876038b51d998682ae8805fd18719 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 27 Nov 2024 13:42:05 +0000 Subject: [PATCH 07/10] Recalculate preconditioner whenever any vcut index changes The response to changes in electron_ppar is very different at the last non-zero grid point before vcut than at nearby grid points. A preconditioner calculated when that grid point was in a different place will be a bad preconditioner. Recalculate the preconditioner whenever that grid point changes (at any r-index). --- .../src/electron_kinetic_equation.jl | 76 +++++++++++++++++-- moment_kinetics/src/initial_conditions.jl | 2 + moment_kinetics/src/nonlinear_solvers.jl | 8 +- 3 files changed, 79 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 0b9e9bc09..9ceda4004 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1622,10 +1622,56 @@ global_rank[] == 0 && println("recalculating precon") end end + new_lowerz_vcut_inds = r.scratch_shared_int + new_upperz_vcut_inds = r.scratch_shared_int2 apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, - num_diss_params, composition, ir) + num_diss_params, composition, ir; + lowerz_vcut_inds=new_lowerz_vcut_inds, + upperz_vcut_inds=new_upperz_vcut_inds) + # Check if either vcut_ind has changed - if either has, recalculate the + # preconditioner because the response at the grid point before the cutoff is + # sharp, so the preconditioner could be significantly wrong when it was + # calculated using the wrong vcut_ind. + lower_vcut_changed = @view z.scratch_shared_int[1:1] + upper_vcut_changed = @view z.scratch_shared_int[2:2] + @serial_region begin + if z.irank == 0 + precon_lowerz_vcut_inds = nl_solver_params.precon_lowerz_vcut_inds + if all(new_lowerz_vcut_inds .== precon_lowerz_vcut_inds) + lower_vcut_changed[] = 0 + else + lower_vcut_changed[] = 1 + precon_lowerz_vcut_inds .= new_lowerz_vcut_inds + end + end + MPI.Bcast!(lower_vcut_changed, comm_inter_block[]; root=0) + #req1 = MPI.Ibcast!(lower_vcut_changed, comm_inter_block[]; root=0) + + if z.irank == z.nrank - 1 + precon_upperz_vcut_inds = nl_solver_params.precon_upperz_vcut_inds + if all(new_upperz_vcut_inds .== precon_upperz_vcut_inds) + upper_vcut_changed[] = 0 + else + upper_vcut_changed[] = 1 + precon_upperz_vcut_inds .= new_upperz_vcut_inds + end + end + MPI.Bcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) + #req2 = MPI.Ibcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) + + # Eventually can use Ibcast!() to make the two broadcasts run + # simultaneously, but need the function to be merged into MPI.jl (see + # https://github.com/JuliaParallel/MPI.jl/pull/882). + #MPI.Waitall([req1, req2]) + end + _block_synchronize() + if lower_vcut_changed[] == 1 || upper_vcut_changed[] == 1 + # One or both of the indices changed for some `ir`, so force the + # preconditioner to be recalculated next time. + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end if !evolve_ppar # update the electron heat flux @@ -2011,7 +2057,9 @@ end function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, - num_diss_params, composition) + num_diss_params, composition; + lowerz_vcut_inds=nothing, + upperz_vcut_inds=nothing) latest_pdf = this_scratch.pdf_electron begin_r_z_vperp_vpa_region() @@ -2026,7 +2074,8 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vp moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi, ir) + composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds, + upperz_vcut_inds=upperz_vcut_inds) end begin_r_z_region() @@ -2050,7 +2099,8 @@ end function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, num_diss_params, composition, - ir) + ir; lowerz_vcut_inds=nothing, + upperz_vcut_inds=nothing) begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin f_electron[ivpa,ivperp,iz] = max(f_electron[ivpa,ivperp,iz], 0.0) @@ -2061,7 +2111,8 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi, ir) + composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds, + upperz_vcut_inds=upperz_vcut_inds) begin_z_region() A = moments.electron.constraints_A_coefficient @@ -2544,7 +2595,8 @@ end @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi, ir; - bc_constraints=true, update_vcut=true) = begin + bc_constraints=true, update_vcut=true, lowerz_vcut_inds=nothing, + upperz_vcut_inds=nothing) = begin @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value") @@ -2744,8 +2796,14 @@ end # plus_vcut_ind+1 where vcut is. vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) if vcut_fraction > 0.5 + if lowerz_vcut_inds !== nothing + lowerz_vcut_inds[ir] = plus_vcut_ind+1 + end pdf[plus_vcut_ind+1,1,1] *= vcut_fraction - 0.5 else + if lowerz_vcut_inds !== nothing + lowerz_vcut_inds[ir] = plus_vcut_ind + end pdf[plus_vcut_ind+1,1,1] = 0.0 pdf[plus_vcut_ind,1,1] *= vcut_fraction + 0.5 end @@ -2934,8 +2992,14 @@ end # minus_vcut_ind where -vcut is. vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) if vcut_fraction < 0.5 + if upperz_vcut_inds !== nothing + upperz_vcut_inds[ir] = minus_vcut_ind-1 + end pdf[minus_vcut_ind-1,1,end] *= 0.5 - vcut_fraction else + if upperz_vcut_inds !== nothing + upperz_vcut_inds[ir] = minus_vcut_ind + end pdf[minus_vcut_ind-1,1,end] = 0.0 pdf[minus_vcut_ind,1,end] *= 1.5 - vcut_fraction end diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index df191e544..ca9856868 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -761,6 +761,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.global_precon_iterations, nl_solver_params.electron_advance.solves_since_precon_update, nl_solver_params.electron_advance.precon_dt, + nl_solver_params.electron_advance.precon_lowerz_vcut_inds, + nl_solver_params.electron_advance.precon_upperz_vcut_inds, nl_solver_params.electron_advance.serial_solve, nl_solver_params.electron_advance.max_nonlinear_iterations_this_step, nl_solver_params.electron_advance.max_linear_iterations_this_step, diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 7b2707d69..aeeb43436 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -66,6 +66,8 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon,Tpretype} global_precon_iterations::Base.RefValue{mk_int} solves_since_precon_update::Base.RefValue{mk_int} precon_dt::Base.RefValue{mk_float} + precon_lowerz_vcut_inds::Vector{mk_int} + precon_upperz_vcut_inds::Vector{mk_int} serial_solve::Bool max_nonlinear_iterations_this_step::Base.RefValue{mk_int} max_linear_iterations_this_step::Base.RefValue{mk_int} @@ -113,6 +115,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa linear_restart = nl_solver_input.linear_restart + n_vcut_inds = 0 if serial_solve H = allocate_float(linear_restart + 1, linear_restart) c = allocate_float(linear_restart + 1) @@ -143,6 +146,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa end V = (V_ppar, V_pdf) + + n_vcut_inds = prod(outer_coord_sizes) else H = allocate_shared_float(linear_restart + 1, linear_restart) c = allocate_shared_float(linear_restart + 1) @@ -271,7 +276,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(nl_solver_input.preconditioner_update_interval), - Ref(mk_float(0.0)), serial_solve, Ref(0), Ref(0), + Ref(mk_float(0.0)), zeros(mk_int, n_vcut_inds), + zeros(mk_int, n_vcut_inds), serial_solve, Ref(0), Ref(0), preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) end From 67237199f1d2778a944238e4d23447aee3ec3a75 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 3 Dec 2024 22:12:23 +0000 Subject: [PATCH 08/10] Add option that can be set to enable inclusion of wall bc in precon Adding the wall boundary condition to the preconditioner matrix (at least in the form implemented so far) does not seem to speed up convergence significantly, so make it a feature that can be optionally enabled. --- moment_kinetics/src/electron_kinetic_equation.jl | 10 ++++++---- moment_kinetics/src/input_structs.jl | 1 + moment_kinetics/src/moment_kinetics_input.jl | 1 + moment_kinetics/src/time_advance.jl | 9 ++++++--- moment_kinetics/test/jacobian_matrix_tests.jl | 1 + 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 9ceda4004..2bf8685a1 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -4709,11 +4709,13 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( jacobian_matrix, z, dt, ion_dt, ir, include; ppar_offset=pdf_size) end - add_wall_boundary_condition_to_Jacobian!( - jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, - vpa_spectral, vpa_advect, moments, - num_diss_params.electron.vpa_dissipation_coefficient, me, ir, include; + if t_params.include_wall_bc_in_preconditioner + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, include; ppar_offset=pdf_size) + end return nothing end diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 047167195..241dd6958 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -89,6 +89,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero cap_factor_ion_dt::mk_float max_pseudotimesteps::mk_int max_pseudotime::mk_float + include_wall_bc_in_preconditioner::Bool write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index c6ff98c85..aaf124309 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -213,6 +213,7 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI cap_factor_ion_dt=10.0, max_pseudotimesteps=1000, max_pseudotime=1.0e-2, + include_wall_bc_in_preconditioner=false, no_restart=false, debug_io=false, ) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index d579c7461..436206e8e 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -431,6 +431,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"]) max_pseudotimesteps = t_input["max_pseudotimesteps"] max_pseudotime = t_input["max_pseudotime"] + include_wall_bc_in_preconditioner = t_input["include_wall_bc_in_preconditioner"] electron_t_params = nothing elseif electron === false debug_io = nothing @@ -441,6 +442,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, cap_factor_ion_dt = Inf max_pseudotimesteps = -1 max_pseudotime = Inf + include_wall_bc_in_preconditioner = false electron_t_params = nothing else debug_io = nothing @@ -476,6 +478,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, cap_factor_ion_dt = Inf max_pseudotimesteps = -1 max_pseudotime = Inf + include_wall_bc_in_preconditioner = false electron_t_params = electron end return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt, @@ -503,9 +506,9 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, mk_float(t_input["constraint_forcing_rate"]), decrease_dt_iteration_threshold, increase_dt_iteration_threshold, mk_float(cap_factor_ion_dt), mk_int(max_pseudotimesteps), - mk_float(max_pseudotime), t_input["write_after_fixed_step_count"], - error_sum_zero, t_input["split_operators"], - t_input["steady_state_residual"], + mk_float(max_pseudotime), include_wall_bc_in_preconditioner, + t_input["write_after_fixed_step_count"], error_sum_zero, + t_input["split_operators"], t_input["steady_state_residual"], mk_float(t_input["converged_residual_value"]), manufactured_solns_input.use_for_advance, t_input["stopfile_name"], debug_io, electron_t_params) diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 0e06891b7..8dd1968c6 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -189,6 +189,7 @@ test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix" "initialization_residual_value" => 2.5, "converged_residual_value" => 1.0e-2, "constraint_forcing_rate" => 2.321, + "include_wall_bc_in_preconditioner" => true, ), "nonlinear_solver" => OptionsDict("nonlinear_max_iterations" => 100, "rtol" => 1.0e-5, From daf0aade45853be35b5c1d950c1a190ef50d3603 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 3 Dec 2024 22:25:39 +0000 Subject: [PATCH 09/10] Inputs for kinetic electron runs with adaptive ion timesteps Includes inputs for runs both with and without wall boundary conditions included in the Jacobian matrix. --- ...tails-KennedyCarpenterARK324-wall-Jac.toml | 162 +++++++++++++++++ ...c-coarse_tails-KennedyCarpenterARK324.toml | 161 +++++++++++++++++ ...sed-z-KennedyCarpenterARK324-wall-Jac.toml | 163 ++++++++++++++++++ ...s-compressed-z-KennedyCarpenterARK324.toml | 161 +++++++++++++++++ ...ed-z4-KennedyCarpenterARK324-wall-Jac.toml | 162 +++++++++++++++++ ...-compressed-z4-KennedyCarpenterARK324.toml | 161 +++++++++++++++++ ...orm-z-KennedyCarpenterARK324-wall-Jac.toml | 163 ++++++++++++++++++ ...ails-uniform-z-KennedyCarpenterARK324.toml | 162 +++++++++++++++++ 8 files changed, 1295 insertions(+) create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml create mode 100644 examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..9fba1454d --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,162 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "sqrt" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..f7bba1bfe --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml @@ -0,0 +1,161 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "sqrt" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..b4f1c6a59 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,163 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_2" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +maximum_dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..9fbb70e16 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml @@ -0,0 +1,161 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_2" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..1acf5c04d --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,162 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_4" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..ba5ae4f24 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml @@ -0,0 +1,161 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_4" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..19bad4f94 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,163 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +#nelement_local = 8 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10 #10000 +nwrite_dfns = 10 #100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..052649878 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml @@ -0,0 +1,162 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +#nelement_local = 8 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10 #10000 +nwrite_dfns = 10 #100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 From 66686c587e3066f6f16be3bab09e0f84c1f070ad Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 12 Jan 2025 14:34:06 +0000 Subject: [PATCH 10/10] Shared-memory debug check for ADI preconditioner --- moment_kinetics/debug_test/kinetic_electron_inputs.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/moment_kinetics/debug_test/kinetic_electron_inputs.jl b/moment_kinetics/debug_test/kinetic_electron_inputs.jl index 4f834c319..b235efdc7 100644 --- a/moment_kinetics/debug_test/kinetic_electron_inputs.jl +++ b/moment_kinetics/debug_test/kinetic_electron_inputs.jl @@ -91,7 +91,12 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, "neutral_numerical_dissipation" => OptionsDict("force_minimum_pdf_value" => 0.0, "vz_dissipation_coefficient" => 1e-2)) +test_input_adi = deepcopy(test_input) +test_input_adi["output"]["run_name"] = "kinetic_electron_adi" +test_input_adi["timestepping"]["implicit_electron_ppar"] = "adi" + test_input_list = [ test_input, + test_input_adi, ]