From 2fa8248b0ed6258dd678b748ca7d87b5d516074e Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 14 Jun 2017 11:49:06 +0200
Subject: [PATCH 01/43] gpu/pypic: PyHT interface to create PyPIC instance...

... depending on the context on the GPU or CPU.
---
 PyHEADTAIL/gpu/pypic.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 PyHEADTAIL/gpu/pypic.py

diff --git a/PyHEADTAIL/gpu/pypic.py b/PyHEADTAIL/gpu/pypic.py
new file mode 100644
index 00000000..e689d9eb
--- /dev/null
+++ b/PyHEADTAIL/gpu/pypic.py
@@ -0,0 +1,34 @@
+'''Translates PyPIC/GPU calls to the corresponding GPU or CPU
+class depending on the current context (in pmath).
+
+PyPIC can be found under https://github.com/PyCOMPLETE/PyPIC .
+
+@authors: Adrian Oeftiger
+@date:    13.06.2017
+'''
+
+from ..general import pmath as pm
+
+from PyPIC.GPU import pypic
+
+class UnknownContextManagerException(Exception):
+    '''Raise if context manager is not found, e.g. cannot determine
+    whether on CPU or on GPU.
+    '''
+
+def make_PyPIC(*args, **kwargs):
+    '''Factory method for PyPIC.GPU.pypic.PyPIC(_GPU) classes.
+    Return PyPIC_GPU instance launched with args and kwargs if current
+    context is on the GPU, otherwise return PyPIC instance.
+    '''
+    if pm.device is 'CPU':
+        return pypic.PyPIC(*args, **kwargs)
+    elif pm.device is 'GPU':
+        from pycuda.autoinit import context
+        if 'context' not in kwargs:
+            kwargs.update(context=context)
+        return pypic.PyPIC_GPU(*args, **kwargs)
+    else:
+        raise UnknownContextManagerException(
+            'Failed to determine current context. '
+            'pmath.device is neither "CPU" nor "GPU".')

From a722c83eabc0dc0d7a54e7e43589ef004861afcb Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 14 Jun 2017 11:49:49 +0200
Subject: [PATCH 02/43] field_maps/field_map: adding new FieldMap...

... which supports field map interpolation up to
all 3 planes and context depending for both CPU and GPU.

I hope the documentation is explanatory enough...
---
 PyHEADTAIL/field_maps/field_map.py | 101 +++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 PyHEADTAIL/field_maps/field_map.py

diff --git a/PyHEADTAIL/field_maps/field_map.py b/PyHEADTAIL/field_maps/field_map.py
new file mode 100644
index 00000000..1275bd11
--- /dev/null
+++ b/PyHEADTAIL/field_maps/field_map.py
@@ -0,0 +1,101 @@
+'''A FieldMap represents a static (electric) field defined in the lab
+frame which interacts with a particle beam distribution in a weak-strong
+approach. Potential applications include e.g. frozen space charge or
+(weak-strong) beam-beam interaction with arbitrary beam distributions.
+
+The FieldMap class seeks to generalise the Transverse_Efield_map (being
+fixed to explicit 2D slice-by-slice treatment) to include 3D treatment
+and extend usage to both CPU and GPU hardware architectures.
+For these purposes FieldMap uses the PyPIC/GPU module (which is
+based on PyPIClib).
+
+PyPIC can be found under https://github.com/PyCOMPLETE/PyPIC .
+
+@authors: Adrian Oeftiger
+@date:    13.06.2017
+'''
+
+from __future__ import division, print_function
+
+from . import Element
+
+from ..gpu.pypic import make_PyPIC
+
+class FieldMap(Element):
+    '''This static (electric) field in the lab frame applies kicks
+    to the beam distribution in a weak-strong interaction model.
+    '''
+    def __init__(self, length, mesh, fields, wrt_beam_centroid=False,
+                 *args, **kwargs):
+        '''Arguments:
+            - length: interaction length around the accelerator over
+              which the force of the field is integrated.
+            - mesh: Mesh instance from PyPIC meshing with dimension 2
+              or 3.
+            - fields: list of arrays with field values defined over the
+              mesh nodes. The field arrays need to have the same
+              dimension as the mesh. Also the shape of the field arrays
+              needs to coincide with the mesh.shape . The list of arrays
+              can have 1 to 3 entries in the order of [E_x, E_y, E_z].
+              Use as many entries as beam planes that you want to apply
+              the field kicks to.
+              (NB: the field arrays need to be either numpy ndarray or
+              pycuda GPUArray instances, depending on whether you want
+              to apply the kicks on the CPU or on the GPU!)
+            - wrt_beam_centroid: if true, the beam centroid will be set
+              to zero during the calculation of the field kicks.
+
+        NB: fields defined in the beam frame need to be Lorentz
+        transformed to the lab frame. This particularly true for fields
+        determined by the particle-in-cell algorithm of PyPIC (where the
+        longitudinal meshing includes the stretched beam distribution):
+
+        1. charge density in beam frame:
+        >>> mesh_charges = pypic.particles_to_mesh(
+                beam.x, beam.y, beam.z_beamframe, charge=beam.charge)
+        >>> rho = mesh_charges / mesh.volume_elem
+
+        2. electrostatic potential in beam frame:
+        >>> phi = pypic.poisson_solve(rho)
+
+        3. electric fields in beam frame via gradient (here for 3D):
+        >>> E_x, E_y, E_z = pypic.get_electric_fields(phi)
+
+        4. Lorentz transform to lab frame:
+        >>> E_x /= beam.gamma
+        >>> E_y /= beam.gamma
+        >>> E_z /= beam.gamma # longitudinal: need gamma^-2, however,
+            # the gradient in pypic.get_electric_fields already
+            # includes one gamma factor in d/dz_beamframe
+
+        Use these lab frame fields [E_x, E_y, E_z] in the FieldMap
+        argument. Attention in the 2.5D slice-by-slice case with the
+        volume element (which should be a 2D area and not a 3D volume).
+        '''
+        self.length = length
+        self.pypic = make_PyPIC(
+            poissonsolver=None, gradient=lambda mesh: None, mesh=mesh)
+        self.fields = fields
+        self.wrt_beam_centroid = wrt_beam_centroid
+
+    def track(self, beam):
+        # prepare argument for PyPIC mesh to particle interpolation
+        mp_coords = np.array([beam.x, beam.y, beam.z]) # zip will cut to #fields
+        if self.wrt_beam_centroid:
+            mp_coords -= np.array(
+                [beam.mean_x(), beam.mean_y(), beam.mean_z()])
+        mesh_fields_and_mp_coords = zip(self.fields, mp_coords)
+
+        # electric fields at each particle position in lab frame [V/m]
+        part_fields = self.pypic.field_to_particles(*mesh_fields_and_mp_coords)
+
+        # integrate over dt, p0 comes from kicking xp=p_x/p0 instead of p_x
+        kick_factor = (self.length / (beam.beta*c) * beam.charge / beam.p0)
+
+        # apply kicks for 1-3 planes depending on #entries in fields
+        for beam_momentum, force_field in zip(['xp', 'yp', 'zp'], part_fields):
+            getattr(beam, beam_momentum) += force_field * kick_factor
+        # beam.xp += part_fields[0] * kick_factor
+        # beam.yp += part_fields[1] * kick_factor
+        # beam.dp += part_fields[2] * kick_factor
+

From cbf7aea764695e04d48c86b90841d35c8499a5bc Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 15 Jun 2017 13:56:09 +0200
Subject: [PATCH 03/43] slicing: allow lambda_z()

Now, the line charge density is given back
at the slice centres if lambda_z is called
without giving an explicite z.
This is a shortcut for lambda_bins()/slice_widths
which seems intuitive to me.

For the unsmoothed line charge density due to the
raw macro-particle distribution, call
slices.lambda_z(smooth=False) .
---
 PyHEADTAIL/particles/slicing.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/PyHEADTAIL/particles/slicing.py b/PyHEADTAIL/particles/slicing.py
index ae7993d0..ea39d3a8 100644
--- a/PyHEADTAIL/particles/slicing.py
+++ b/PyHEADTAIL/particles/slicing.py
@@ -269,10 +269,15 @@ def lambda_prime_bins(self, sigma=None, smoothen_before=True,
             mp_density_derivative = smoothen(mp_density_derivative)
         return mp_density_derivative * self.charge_per_mp
 
-    def lambda_z(self, z, sigma=None, smoothen=True):
-        '''Line charge density with respect to z along the slices.'''
+    def lambda_z(self, z=None, sigma=None, smoothen=True):
+        '''Line charge density with respect to z along the slices.
+        If z is None, return the line charge density along the slice
+        centres.
+        '''
         lambda_along_bins = (self.lambda_bins(sigma, smoothen)
                              / self.slice_widths)
+        if z is None:
+            return lambda_along_bins
         tck = interpolate.splrep(self.z_centers, lambda_along_bins, s=0)
         l_of_z = interpolate.splev(z, tck, der=0, ext=1)
         return l_of_z

From 62ebd720d54152a1c753dbf143cef0e66f4361ba Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 15 Jun 2017 14:01:01 +0200
Subject: [PATCH 04/43] slicing: fixed bug in convert_to_particles

If empty_particles is not given, pmath.empty_like
was called with a dtype given which is not allowed.

Furthermore, now make sure that particle indices
outside the slicing area will be assigned zero.

Also, using the pmath.take now instead of the array
take which does not exist for GPUArrays. (Nonetheless,
this method fails for GPUArrays with an "invalid
subindex in axis 0" IndexError at the fancy
indexing assignment.)
---
 PyHEADTAIL/particles/slicing.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/PyHEADTAIL/particles/slicing.py b/PyHEADTAIL/particles/slicing.py
index ea39d3a8..4b53e531 100644
--- a/PyHEADTAIL/particles/slicing.py
+++ b/PyHEADTAIL/particles/slicing.py
@@ -314,13 +314,15 @@ def convert_to_particles(self, slice_array, empty_particles=None):
         given by its slice_array value via the slice that the
         particle belongs to.
         '''
-        if empty_particles == None:
-            empty_particles = pm.empty_like(
-                self.slice_index_of_particle, dtype=np.float64)
+        if empty_particles is None:
+            empty_particles = pm.zeros(
+                self.slice_index_of_particle.shape, dtype=np.float64)
+        else:
+            empty_particles.fill(0)
         particle_array = empty_particles
         p_id = self.particles_within_cuts
-        s_id = self.slice_index_of_particle.take(p_id)
-        particle_array[p_id] = slice_array.take(s_id)
+        s_id = pm.take(self.slice_index_of_particle, p_id)
+        particle_array[p_id] = pm.take(slice_array, s_id)
         return particle_array
 
 

From cc7e51f8e865f70f1da092f7813a7a3660d04ce7 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 15 Jun 2017 15:57:36 +0200
Subject: [PATCH 05/43] slicing: convert_to_particles works for CPU&GPU

And it's very effective for GPU (with a speed-up
of >30000 on a P100). Probably the CPU version
could be improved, it takes ~10ms.
---
 PyHEADTAIL/general/pmath.py     | 16 ++++++++++++++++
 PyHEADTAIL/gpu/gpu_wrap.py      | 26 ++++++++++++++++++++++++++
 PyHEADTAIL/particles/slicing.py | 11 ++---------
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/PyHEADTAIL/general/pmath.py b/PyHEADTAIL/general/pmath.py
index 62afa7c4..35c6d731 100644
--- a/PyHEADTAIL/general/pmath.py
+++ b/PyHEADTAIL/general/pmath.py
@@ -135,6 +135,20 @@ def _searchsortedright(array, values, dest_array=None):
         dest_array = np.searchsorted(array, values, side='right')
     return dest_array
 
+def _slice_to_particles(sliceset, slice_array, particle_array=None):
+        '''Convert slice_array with entries for each slice to a
+        particle array with the respective entry of each particle
+        given by its slice_array value via the slice that the
+        particle belongs to. If provided, particle_array should be a
+        zero-filled destination array.
+        '''
+        if particle_array is None:
+            particle_array = zeros(
+                sliceset.slice_index_of_particle.shape, dtype=np.float64)
+        p_id = sliceset.particles_within_cuts
+        s_id = take(sliceset.slice_index_of_particle, p_id)
+        particle_array[p_id] = take(slice_array, s_id)
+        return particle_array
 
 #### dictionaries storing the CPU and GPU versions of the desired functions ####
 _CPU_numpy_func_dict = {
@@ -165,6 +179,7 @@ def _searchsortedright(array, values, dest_array=None):
             & (sliceset.slice_index_of_particle >= 0))
         )[0].astype(np.int32),
     'macroparticles_per_slice': lambda sliceset: _count_macroparticles_per_slice_cpu(sliceset),
+    'slice_to_particles': _slice_to_particles,
     'take': np.take,
     'convolve': np.convolve,
     'seq': lambda stop: np.arange(stop, dtype=np.int32),
@@ -220,6 +235,7 @@ def _searchsortedright(array, values, dest_array=None):
         'particles_within_cuts': gpu_wrap.particles_within_cuts,
         'particles_outside_cuts': gpu_wrap.particles_outside_cuts,
         'macroparticles_per_slice': gpu_wrap.macroparticles_per_slice,
+        'slice_to_particles': gpu_wrap.slice_to_particles,
         'take': pycuda.gpuarray.take,
         'convolve': gpu_wrap.convolve,
         'seq': lambda stop: pycuda.gpuarray.arange(stop, dtype=np.int32),
diff --git a/PyHEADTAIL/gpu/gpu_wrap.py b/PyHEADTAIL/gpu/gpu_wrap.py
index f22c66ff..0a2a2ec1 100644
--- a/PyHEADTAIL/gpu/gpu_wrap.py
+++ b/PyHEADTAIL/gpu/gpu_wrap.py
@@ -49,6 +49,7 @@
 
 
 if has_pycuda:
+    # define all compilation depending functions (e.g. ElementwiseKernel)
     _sub_1dgpuarr = pycuda.elementwise.ElementwiseKernel(
         'double* out, double* a, const double* b',
         'out[i] = a[i] - b[0]',
@@ -283,6 +284,31 @@ def thrust_mean_and_std_per_slice(sliceset, u, stream=None):
                         sliceset.n_slices, sigma_u)
         return (mean_u, sigma_u)
 
+
+    _slice_to_particles = pycuda.elementwise.ElementwiseKernel(
+        "unsigned int* slice_index_of_particle, double* input_slice_quantity, "
+        "double* output_particle_array",
+        # i is the particle index within slice_index_of_particle
+        "output_particle_array[i] = "
+            "input_slice_quantity[slice_index_of_particle[i]]",
+        "slice_to_particles_kernel"
+    )
+    def slice_to_particles(sliceset, slice_array, particle_array=None):
+        '''Convert slice_array with entries for each slice to a
+        particle array with the respective entry of each particle
+        given by its slice_array value via the slice that the
+        particle belongs to. If provided, particle_array should be a
+        zero-filled destination array.
+        '''
+        if particle_array == None:
+            particle_array = pycuda.gpuarray.empty(
+                sliceset.slice_index_of_particle.shape,
+                dtype=np.float64, allocator=gpu_utils.memory_pool.allocate)
+        _slice_to_particles(sliceset.slice_index_of_particle,
+                            slice_array, particle_array)
+        return particle_array
+
+
 def _inplace_pow(x_gpu, p, stream=None):
     '''
     Perform an in-place x_gpu = x_gpu ** p
diff --git a/PyHEADTAIL/particles/slicing.py b/PyHEADTAIL/particles/slicing.py
index 4b53e531..4961f95e 100644
--- a/PyHEADTAIL/particles/slicing.py
+++ b/PyHEADTAIL/particles/slicing.py
@@ -314,16 +314,9 @@ def convert_to_particles(self, slice_array, empty_particles=None):
         given by its slice_array value via the slice that the
         particle belongs to.
         '''
-        if empty_particles is None:
-            empty_particles = pm.zeros(
-                self.slice_index_of_particle.shape, dtype=np.float64)
-        else:
+        if empty_particles is not None:
             empty_particles.fill(0)
-        particle_array = empty_particles
-        p_id = self.particles_within_cuts
-        s_id = pm.take(self.slice_index_of_particle, p_id)
-        particle_array[p_id] = pm.take(slice_array, s_id)
-        return particle_array
+        return pm.slice_to_particles(self, slice_array, empty_particles)
 
 
 class Slicer(Printing):

From 0f0d979da94ae992c9665794f8c4ed195263321a Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 15 Jun 2017 16:53:06 +0200
Subject: [PATCH 06/43] Moving UnknownContextManagerError...

... from gpu.pypic to general.contextmanager .
---
 PyHEADTAIL/general/contextmanager.py |  9 +++++++++
 PyHEADTAIL/gpu/pypic.py              | 10 ++--------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/PyHEADTAIL/general/contextmanager.py b/PyHEADTAIL/general/contextmanager.py
index ca4e50f9..37200d3f 100644
--- a/PyHEADTAIL/general/contextmanager.py
+++ b/PyHEADTAIL/general/contextmanager.py
@@ -104,6 +104,15 @@ def _patch_binop(self, other):
     pycuda.gpuarray.GPUArray.__truediv__ = pycuda.gpuarray.GPUArray.__div__
 
 
+class UnknownContextManagerError(Exception):
+    '''Raise if context manager is not found, e.g. cannot determine
+    whether on CPU or on GPU.
+    '''
+    def __init__(self, message='Failed to determine current context, e.g. '
+                               'whether pmath.device is "CPU" or "GPU".'):
+        self.message = message
+
+
 class Context(object):
     '''
     Example contextmanager class providing enter and exit methods
diff --git a/PyHEADTAIL/gpu/pypic.py b/PyHEADTAIL/gpu/pypic.py
index e689d9eb..c08c6c81 100644
--- a/PyHEADTAIL/gpu/pypic.py
+++ b/PyHEADTAIL/gpu/pypic.py
@@ -8,14 +8,10 @@ class depending on the current context (in pmath).
 '''
 
 from ..general import pmath as pm
+from ..general.contextmanager import UnknownContextManagerError
 
 from PyPIC.GPU import pypic
 
-class UnknownContextManagerException(Exception):
-    '''Raise if context manager is not found, e.g. cannot determine
-    whether on CPU or on GPU.
-    '''
-
 def make_PyPIC(*args, **kwargs):
     '''Factory method for PyPIC.GPU.pypic.PyPIC(_GPU) classes.
     Return PyPIC_GPU instance launched with args and kwargs if current
@@ -29,6 +25,4 @@ def make_PyPIC(*args, **kwargs):
             kwargs.update(context=context)
         return pypic.PyPIC_GPU(*args, **kwargs)
     else:
-        raise UnknownContextManagerException(
-            'Failed to determine current context. '
-            'pmath.device is neither "CPU" nor "GPU".')
+        raise UnknownContextManagerError()

From 7c8cd28e633f43471de01ede029c633d4a5238e4 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 15 Jun 2017 16:54:18 +0200
Subject: [PATCH 07/43] pmath: add convenience functions between devices...

... for ensuring CPU and GPU arrays to be in certain
memories (RAM vs. GPU mem). Also accepts floats, ints etc.
---
 PyHEADTAIL/general/pmath.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/PyHEADTAIL/general/pmath.py b/PyHEADTAIL/general/pmath.py
index 35c6d731..9df0b9dc 100644
--- a/PyHEADTAIL/general/pmath.py
+++ b/PyHEADTAIL/general/pmath.py
@@ -24,6 +24,8 @@
 
 from functools import wraps
 
+from contextmanager import UnknownContextManagerError
+
 # FADDEEVA error function (wofz) business (used a.o. in spacecharge module)
 try:
     from errfff import errf as _errf_f
@@ -38,6 +40,33 @@ def _wofz(x, y):
 def _errfadd(z):
     return np.exp(-z**2) * _erfc(z * -1j)
 
+# context convenience functions:
+def ensure_CPU(array):
+    '''Accept a GPUArray or a NumPy.ndarray and return a NumPy.ndarray.
+    '''
+    try:
+        return array.get()
+    except AttributeError:
+        return array
+
+if has_pycuda:
+    def ensure_GPU(array):
+        '''Accept a GPUArray or a NumPy.ndarray and return a GPUArray.'''
+        if not isinstance(array, pycuda.gpuarray.GPUArray):
+            array = pycuda.gpuarray.to_gpu(np.array(array))
+        return array
+
+def ensure_same_device(array):
+    '''Accept a GPUarray or a NumPy.ndarray, check which device we
+    are currently running on in the context manager, and return
+    a possibly transferred GPUarray or a NumPy.ndarray accordingly.
+    '''
+    if device == 'CPU':
+        return ensure_CPU(array)
+    elif device == 'GPU':
+        return ensure_GPU(array)
+    else:
+        raise UnknownContextManagerError()
 
 # # Kevin's sincos interface:
 # try:

From c476561e0940f9e386bea55d98cf11965aea670b Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 15 Jun 2017 16:57:04 +0200
Subject: [PATCH 08/43] fieldmaps/fieldmap: runs now both on GPU and CPU

some minor details fixed and fields do not need to be
given on GPU already, will transfer automatically if
the FieldMap is initialised in GPU context.
---
 PyHEADTAIL/field_maps/field_map.py | 41 ++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/PyHEADTAIL/field_maps/field_map.py b/PyHEADTAIL/field_maps/field_map.py
index 1275bd11..4726f00b 100644
--- a/PyHEADTAIL/field_maps/field_map.py
+++ b/PyHEADTAIL/field_maps/field_map.py
@@ -17,13 +17,16 @@
 
 from __future__ import division, print_function
 
+from scipy.constants import c
+
 from . import Element
 
+from ..general import pmath as pm
 from ..gpu.pypic import make_PyPIC
 
 class FieldMap(Element):
-    '''This static (electric) field in the lab frame applies kicks
-    to the beam distribution in a weak-strong interaction model.
+    '''This static field in the lab frame applies kicks to the beam
+    distribution in a weak-strong interaction model.
     '''
     def __init__(self, length, mesh, fields, wrt_beam_centroid=False,
                  *args, **kwargs):
@@ -39,14 +42,17 @@ def __init__(self, length, mesh, fields, wrt_beam_centroid=False,
               can have 1 to 3 entries in the order of [E_x, E_y, E_z].
               Use as many entries as beam planes that you want to apply
               the field kicks to.
-              (NB: the field arrays need to be either numpy ndarray or
-              pycuda GPUArray instances, depending on whether you want
-              to apply the kicks on the CPU or on the GPU!)
             - wrt_beam_centroid: if true, the beam centroid will be set
               to zero during the calculation of the field kicks.
 
-        NB: fields defined in the beam frame need to be Lorentz
-        transformed to the lab frame. This particularly true for fields
+        NB 1: FieldMap instances should be initialised in the proper
+        context. If the FieldMap will track on the GPU, it should be
+        initiated within a GPU context:
+        >>> with PyHEADTAIL.general.contextmanager.GPU(beam) as cmg:
+        >>>     fieldmap = FieldMap(...)
+
+        NB 2: fields defined in the beam frame need to be Lorentz
+        transformed to the lab frame. This is the case e.g. for fields
         determined by the particle-in-cell algorithm of PyPIC (where the
         longitudinal meshing includes the stretched beam distribution):
 
@@ -74,27 +80,34 @@ def __init__(self, length, mesh, fields, wrt_beam_centroid=False,
         '''
         self.length = length
         self.pypic = make_PyPIC(
-            poissonsolver=None, gradient=lambda mesh: None, mesh=mesh)
-        self.fields = fields
+            poissonsolver=None,
+            gradient=lambda *args, **kwargs: None,
+            mesh=mesh)
+        self.fields = map(pm.ensure_same_device, fields)
         self.wrt_beam_centroid = wrt_beam_centroid
 
     def track(self, beam):
         # prepare argument for PyPIC mesh to particle interpolation
-        mp_coords = np.array([beam.x, beam.y, beam.z]) # zip will cut to #fields
+        mx, my, mz = 0, 0, 0
         if self.wrt_beam_centroid:
-            mp_coords -= np.array(
-                [beam.mean_x(), beam.mean_y(), beam.mean_z()])
+            mx, my, mz = beam.mean_x(), beam.mean_y(), beam.mean_z()
+        mp_coords = [beam.x - mx,
+                     beam.y - my,
+                     beam.z - mz] # zip will cut to #fields
+
         mesh_fields_and_mp_coords = zip(self.fields, mp_coords)
 
         # electric fields at each particle position in lab frame [V/m]
         part_fields = self.pypic.field_to_particles(*mesh_fields_and_mp_coords)
 
         # integrate over dt, p0 comes from kicking xp=p_x/p0 instead of p_x
-        kick_factor = (self.length / (beam.beta*c) * beam.charge / beam.p0)
+        kick_factor = self.length / (beam.beta*c) * beam.charge / beam.p0
 
         # apply kicks for 1-3 planes depending on #entries in fields
         for beam_momentum, force_field in zip(['xp', 'yp', 'zp'], part_fields):
-            getattr(beam, beam_momentum) += force_field * kick_factor
+            val = getattr(beam, beam_momentum)
+            setattr(beam, beam_momentum, val + force_field * kick_factor)
+        # for 3D, the for loop explicitly does:
         # beam.xp += part_fields[0] * kick_factor
         # beam.yp += part_fields[1] * kick_factor
         # beam.dp += part_fields[2] * kick_factor

From ad096ab8c13748e61a2778b9088d1c969016e75d Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 15 Jun 2017 18:18:25 +0200
Subject: [PATCH 09/43] field_map/FieldMapSliceWise: slice-by-slice 2.5D

New FieldMapSliceWise class, which applies a 2D fieldmap
to the slices of the beam, weighing the fieldmap with
the slice charge.
Works very quickly on the GPU (P100: 6.5ns per particle
kick + betatron track for 1024x1024x32). CPU is slow though.
---
 PyHEADTAIL/field_maps/field_map.py | 65 ++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/PyHEADTAIL/field_maps/field_map.py b/PyHEADTAIL/field_maps/field_map.py
index 4726f00b..21ad95a1 100644
--- a/PyHEADTAIL/field_maps/field_map.py
+++ b/PyHEADTAIL/field_maps/field_map.py
@@ -112,3 +112,68 @@ def track(self, beam):
         # beam.yp += part_fields[1] * kick_factor
         # beam.dp += part_fields[2] * kick_factor
 
+
+class FieldMapSliceWise(FieldMap):
+    '''As for the FieldMap, this represents a static two-dimensional
+    transverse field in the lab frame. Kicks are applied in a
+    slice-by-slice manner to the beam distribution in a weak-strong
+    interaction model. The same field is applied to all slices while
+    being multiplied by the local line charge density [Coul/m].
+
+    A possible application is a slice-by-slice frozen space charge
+    model.
+    '''
+    def __init__(self, slicer, *args, **kwargs):
+        '''Arguments in addition to FieldMap arguments:
+            - slicer: determines the longitudinal discretisation for the
+              local line charge density, with which the field is
+              multiplied at each track call.
+
+        NB: mesh needs to be a two-dimensional mesh describing the
+        discrete domain of the transverse fields.
+
+        NB2: the field values should be charge-density-normalised as
+        they are multiplied by the line charge density for each slice,
+        c.f. e.g. the Bassetti-Erskine formula without Q (as in the
+        spacecharge module's
+        TransverseGaussianSpaceCharge.get_efieldn()).
+        '''
+        self.slicer = slicer
+        super(FieldMapSliceWise, self).__init__(*args, **kwargs)
+
+        # require 2D!
+        assert self.pypic.mesh.dimension == 2, \
+            'mesh needs to be two-dimensional!'
+        assert all(map(lambda f: f.ndim == 2, self.fields)), \
+            'transverse field components need to be two-dimensional arrays!'
+        #
+
+    def track(self, beam):
+        # prepare argument for PyPIC mesh to particle interpolation
+        mx, my, mz = 0, 0, 0
+        if self.wrt_beam_centroid:
+            mx, my, mz = beam.mean_x(), beam.mean_y(), beam.mean_z()
+        mp_coords = [beam.x - mx,
+                     beam.y - my,
+                     beam.z - mz] # zip will cut to #fields
+
+        mesh_fields_and_mp_coords = zip(self.fields, mp_coords)
+
+        # electric fields at each particle position in lab frame [V/m]
+        part_fields = self.pypic.field_to_particles(*mesh_fields_and_mp_coords)
+
+        # weigh electric field with slice line charge density;
+        # integrate over dt, p0 comes from kicking xp=p_x/p0 instead of p_x
+        slices = beam.get_slices(self.slicer)
+        lambda_z = slices.convert_to_particles(slices.lambda_z(smoothen=False))
+        kick_factor = (self.length / (beam.beta*c) * beam.charge / beam.p0
+                       * lambda_z)
+
+        # apply kicks for 1-3 planes depending on #entries in fields
+        for beam_momentum, force_field in zip(['xp', 'yp', 'zp'], part_fields):
+            val = getattr(beam, beam_momentum)
+            setattr(beam, beam_momentum, val + force_field * kick_factor)
+        # for 3D, the for loop explicitly does:
+        # beam.xp += part_fields[0] * kick_factor
+        # beam.yp += part_fields[1] * kick_factor
+        # beam.dp += part_fields[2] * kick_factor

From c80ac9b61efb85915d51deb3289fa1ad72537aa8 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 16 Jun 2017 14:23:31 +0200
Subject: [PATCH 10/43] spacecharge: added new FrozenGaussianSpaceCharge25D

The model precomputes a Gaussian electric field on a grid
via the TransverseGaussianSpaceCharge. This static field
is applied as kicks to the beam particles in a
FieldMapSliceWise fashtion, slice-by-slice,
with the weighted local line charge density to the beam.
That is to say, the longitudinal plane is self-consistent
while the transverse plane is a frozen Bassetti-Erskine
computed errf field of a fixed RMS beam size.
The interpolation is executed via PyPIClib's (PyPIC.GPU's)
field_to_particles (m2p) method.
---
 PyHEADTAIL/spacecharge/pypic_spacecharge.py | 78 ++++++++++++++++++++-
 1 file changed, 76 insertions(+), 2 deletions(-)

diff --git a/PyHEADTAIL/spacecharge/pypic_spacecharge.py b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
index 4e54aebd..7ae7ee6c 100644
--- a/PyHEADTAIL/spacecharge/pypic_spacecharge.py
+++ b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
@@ -9,15 +9,18 @@
 @date: 18.01.2016
 '''
 
-from __future__ import division
+from __future__ import division, print_function
 
-import numpy as np
 from scipy.constants import c
 
 from . import Element
 
 from ..general import pmath as pm
+from ..field_maps.field_map import FieldMapSliceWise
+from pypic_factory import create_mesh
+from spacecharge import TransverseGaussianSpaceCharge
 
+import numpy as np
 
 def align_particles(beam, mesh_3d):
     '''Sort all particles by their mesh node IDs.'''
@@ -109,3 +112,74 @@ def track(self, beam, pypic_state=None):
             # gradient in PyPIC, another gamma factor included here:
             beam.dp += force_fields[2] * kick_factor/beam.gamma
 
+
+class FrozenGaussianSpaceCharge25D(FieldMapSliceWise):
+    '''Transverse slice-by-slice (2.5D) frozen space charge assuming
+    a static transverse Gaussian distribution of a fixed RMS size.
+    The present class is essentially a field_map.FieldMapSliceWise with
+    a pre-filled Bassetti-Erskine formula computed field map
+    (cf. spacecharge.TransverseGaussianSpaceCharge).
+    The same electric transverse field is applied to all slices while
+    being multiplied by the local line charge density [Coul/m].
+    In particular, this means that the strength of the local field is
+    self-consistent in the longitudinal plane but frozen in the
+    transverse plane (with the fixed Gaussian electric field).
+
+    This frozen space charge model essentially acts equivalently to an
+    external magnet and fails to provide self-consistent treatment of
+    space charge related effects like quadrupolar envelope breathing
+    etc.
+    '''
+    def __init__(self, slicer, length, sigma_x, sigma_y, gamma,
+                 n_mesh_sigma=[6, 6], mesh_size=[1024, 1024],
+                 *args, **kwargs):
+        '''Arguments:
+            - slicer: determines the longitudinal discretisation for the
+              local line charge density, with which the field is
+              multiplied at each track call.
+            - length: interaction length around the accelerator over
+              which the force of the field is integrated.
+            - sigma_x, sigma_y: the horizontal and vertical RMS width of
+              the transverse Gaussian distribution modelling the beam
+              distribution.
+            - gamma: the relativistic Lorentz factor of the beam.
+
+        Optional arguments:
+            - n_mesh_sigma: 2-list of number of beam RMS values in
+              [x, y] to span across half the mesh width for the
+              field interpolation.
+            - mesh_size: 2-list of number of mesh nodes per transverse
+              plane [x, y].
+
+        NB: FrozenGaussianSpaceCharge25D instances should be initialised
+        in the proper context. If the FrozenGaussianSpaceCharge25D will
+        track on the GPU, it should be initiated within a GPU context:
+        >>> with PyHEADTAIL.general.contextmanager.GPU(beam) as cmg:
+        >>>     frozen_sc_node = FrozenGaussianSpaceCharge25D(...)
+        '''
+        wrt_beam_centroid = True
+        mesh = create_mesh(
+            mesh_origin=[-n_mesh_sigma[0] * sigma_x,
+                         -n_mesh_sigma[1] * sigma_y],
+            mesh_distances=[sigma_x * 2 * n_mesh_sigma[0] / mesh_size[0],
+                            sigma_y * 2 * n_mesh_sigma[1] / mesh_size[1]],
+            mesh_size=mesh_size,
+        )
+
+        # calculate Bassetti-Erskine formula on either CPU or GPU:
+        ## prepare arguments for the proper device:
+        xg, yg = map(pm.ensure_same_device, np.meshgrid(
+            np.linspace(mesh.x0, mesh.x0 + mesh.dx*mesh.nx, mesh.nx),
+            np.linspace(mesh.y0, mesh.y0 + mesh.dy*mesh.ny, mesh.ny)
+        ))
+        sigma_x, sigma_y = map(pm.ensure_same_device, [sigma_x, sigma_y])
+        ## compute fields
+        be_sc = TransverseGaussianSpaceCharge(None, None)
+        fields_beamframe = be_sc.get_efieldn(xg, yg, 0, 0, sigma_x, sigma_y)
+
+        # Lorentz trafo to lab frame:
+        fields = [f/gamma for f in fields_beamframe]
+
+        super(FrozenGaussianSpaceCharge25D, self).__init__(
+            slicer=slicer, length=length, mesh=mesh, fields=fields,
+            wrt_beam_centroid=wrt_beam_centroid, *args, **kwargs)

From 80b8cd4ba214ea696faf70a6e44b64417022fff6 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 15 Mar 2017 15:32:55 +0100
Subject: [PATCH 11/43] generators.py: slight reshuffle of
 StationaryExponential

... and removing unused width attribute.
---
 PyHEADTAIL/particles/generators.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/PyHEADTAIL/particles/generators.py b/PyHEADTAIL/particles/generators.py
index 94fb734b..9864e956 100644
--- a/PyHEADTAIL/particles/generators.py
+++ b/PyHEADTAIL/particles/generators.py
@@ -619,7 +619,7 @@ def _compute_emittance(self, rfbucket, psi):
 
 class StationaryExponential(object):
 
-    def __init__(self, H, Hmax=None, width=1000, Hcut=0):
+    def __init__(self, H, Hmax=None, Hcut=0):
         self.H = H
         self.H0 = 1
         if not Hmax:
@@ -627,13 +627,12 @@ def __init__(self, H, Hmax=None, width=1000, Hcut=0):
         else:
             self.Hmax = Hmax
         self.Hcut = Hcut
-        self.width = width
+
+    def _psi(self, H):
+        return np.exp((H - self.Hmax) / self.H0) - np.exp(-self.Hmax / self.H0)
 
     def function(self, z, dp):
-        H_hat = self.H(0., 0.).clip(min=0)
-        psi = np.exp((self.H(z, dp).clip(min=0)-H_hat)/self.H0) - np.exp(-H_hat/self.H0)
-        psi_norm = np.exp((self.Hmax-H_hat)/self.H0) - np.exp(-H_hat/self.H0)
-        return psi/psi_norm
+        return self._psi(self.H(z, dp).clip(min=0)) / self._psi(self.Hmax)
 
 
 class HEADTAILcoords(object):

From 6e712b54fd3c95a13f0befc9d498349ecbad6e47 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 16 Jun 2017 16:14:52 +0200
Subject: [PATCH 12/43] generators: extract RFBucket matching into separate
 file

... in view of providing several other distribution types
for the longitudinal plane. The generators.py file will grow
too large and contain two parts: generating macro-particles
and matching the distributions in longitudinal. It seems logic
to split here.
---
 PyHEADTAIL/particles/generators.py        | 227 +-------------------
 PyHEADTAIL/particles/rfbucket_matching.py | 245 ++++++++++++++++++++++
 2 files changed, 249 insertions(+), 223 deletions(-)
 create mode 100644 PyHEADTAIL/particles/rfbucket_matching.py

diff --git a/PyHEADTAIL/particles/generators.py b/PyHEADTAIL/particles/generators.py
index 9864e956..c6978083 100644
--- a/PyHEADTAIL/particles/generators.py
+++ b/PyHEADTAIL/particles/generators.py
@@ -9,14 +9,10 @@
 import numpy as np
 
 from particles import Particles
-from scipy.optimize import brentq, newton
-# from ..trackers.rf_bucket import RFBucket
-
-from ..cobra_functions.pdf_integrators_2d import quad2d
+from rfbucket_matching import RFBucketMatcher, StationaryExponential
 
 from . import Printing
 
-from functools import partial
 from scipy.constants import e, c
 
 
@@ -136,7 +132,8 @@ def _longitudinal_linear_matcher(beam, *args, **kwargs):
 
 
 def RF_bucket_distribution(rfbucket, sigma_z=None, epsn_z=None,
-                           margin=0, *args, **kwargs):
+                           margin=0, distribution_type=StationaryExponential,
+                           *args, **kwargs):
     '''Return a distribution function which generates particles
     which are matched to the specified bucket and target emittance or std
     Specify only one of sigma_z, epsn_z
@@ -151,7 +148,7 @@ def RF_bucket_distribution(rfbucket, sigma_z=None, epsn_z=None,
     Raises:
         ValueError: If neither or both of sigma_z, epsn_z are specified
     '''
-    rf_bucket_matcher_impl = RFBucketMatcher(rfbucket, StationaryExponential,
+    rf_bucket_matcher_impl = RFBucketMatcher(rfbucket, distribution_type,
                                              sigma_z=sigma_z, epsn_z=epsn_z,
                                              *args, **kwargs)
 
@@ -419,222 +416,6 @@ def _kv4d(n_particles):
     return _kv4d
 
 
-class RFBucketMatcher(Printing):
-
-    def __init__(self, rfbucket, psi, sigma_z=None, epsn_z=None,
-                 verbose_regeneration=False, *args, **kwargs):
-
-        self.rfbucket = rfbucket
-        hamiltonian = partial(rfbucket.hamiltonian, make_convex=True)
-        hmax = rfbucket.h_sfp(make_convex=True)
-
-        self.psi_object = psi(hamiltonian, hmax)
-        self.psi = self.psi_object.function
-
-        self.verbose_regeneration = verbose_regeneration
-
-        if sigma_z and not epsn_z:
-            self.variable = sigma_z
-            self.psi_for_variable = self.psi_for_bunchlength_newton_method
-        elif not sigma_z and epsn_z:
-            self.variable = epsn_z
-            self.psi_for_variable = self.psi_for_emittance_newton_method
-        else:
-            raise ValueError("Can not generate mismatched matched " +
-                             "distribution! (Don't provide both sigma_z " +
-                             "and epsn_z!)")
-
-    def psi_for_emittance_newton_method(self, epsn_z):
-
-        # Maximum emittance
-        self.psi_object.H0 = self.rfbucket.guess_H0(
-            self.rfbucket.circumference, from_variable='sigma')
-        epsn_max = self._compute_emittance(self.rfbucket, self.psi)
-        if epsn_z > epsn_max:
-            self.warns('Given RMS emittance does not fit into bucket. ' +
-                       'Using (maximum) full bucket emittance ' +
-                       str(epsn_max*0.99) + 'eV s instead.')
-            epsn_z = epsn_max*0.99
-        self.prints('*** Maximum RMS emittance ' + str(epsn_max) + 'eV s.')
-
-        def get_zc_for_epsn_z(ec):
-            self.psi_object.H0 = self.rfbucket.guess_H0(
-                ec, from_variable='epsn')
-            emittance = self._compute_emittance(self.rfbucket, self.psi)
-
-            self.prints('... distance to target emittance: ' +
-                        '{:.2e}'.format(emittance-epsn_z))
-
-            return emittance-epsn_z
-
-        try:
-            ec_bar = newton(get_zc_for_epsn_z, epsn_z, tol=5e-4)
-        except RuntimeError:
-            self.warns('RFBucketMatcher -- failed to converge while ' +
-                       'using Newton-Raphson method. ' +
-                       'Instead trying classic Brent method...')
-            ec_bar = brentq(get_zc_for_epsn_z, epsn_z/2, 2*epsn_max)
-
-        self.psi_object.H0 = self.rfbucket.guess_H0(
-            ec_bar, from_variable='epsn')
-        emittance = self._compute_emittance(self.rfbucket, self.psi)
-        self.prints('--> Emittance: ' + str(emittance))
-        sigma = self._compute_sigma(self.rfbucket, self.psi)
-        self.prints('--> Bunch length:' + str(sigma))
-
-    def psi_for_bunchlength_newton_method(self, sigma):
-
-        # Maximum bunch length
-        self.psi_object.H0 = self.rfbucket.guess_H0(
-            self.rfbucket.circumference, from_variable='sigma')
-        sigma_max = self._compute_sigma(self.rfbucket, self.psi)
-        if sigma > sigma_max:
-            self.warns('Given RMS bunch length does not fit into bucket. ' +
-                       'Using (maximum) full bucket RMS bunch length ' +
-                       str(sigma_max*0.99) + 'm instead.')
-            sigma = sigma_max*0.99
-        self.prints('*** Maximum RMS bunch length ' + str(sigma_max) + 'm.')
-
-        def get_zc_for_sigma(zc):
-            '''Width for bunch length'''
-            self.psi_object.H0 = self.rfbucket.guess_H0(
-                zc, from_variable='sigma')
-            length = self._compute_sigma(self.rfbucket, self.psi)
-
-            if np.isnan(length): raise ValueError
-
-            self.prints('... distance to target bunch length: ' +
-                        '{:.4e}'.format(length-sigma))
-
-            return length-sigma
-
-        zc_bar = newton(get_zc_for_sigma, sigma)
-
-        self.psi_object.H0 = self.rfbucket.guess_H0(
-            zc_bar, from_variable='sigma')
-        sigma = self._compute_sigma(self.rfbucket, self.psi)
-        self.prints('--> Bunch length: ' + str(sigma))
-        emittance = self._compute_emittance(self.rfbucket, self.psi)
-        self.prints('--> Emittance: ' + str(emittance))
-
-    def linedensity(self, xx):
-        quad_type = fixed_quad
-
-        L = []
-        try:
-            L = np.array([quad_type(lambda y: self.psi(x, y), 0,
-                                    self.p_limits(x))[0] for x in xx])
-        except TypeError:
-            L = quad_type(lambda y: self.psi(xx, y), 0, self.p_limits(xx))[0]
-        L = np.array(L)
-
-        return 2*L
-
-    def generate(self, macroparticlenumber, cutting_margin=0):
-        '''Generate a 2d phase space of n_particles particles randomly distributed
-        according to the particle distribution function psi within the region
-        [xmin, xmax, ymin, ymax].
-        '''
-        self.psi_for_variable(self.variable)
-
-        xmin, xmax = self.rfbucket.z_left, self.rfbucket.z_right
-        ymin = -self.rfbucket.dp_max(self.rfbucket.z_right)
-        ymax = -ymin
-
-        # rejection sampling
-        uniform = np.random.uniform
-        n_gen = macroparticlenumber
-        u = uniform(low=xmin, high=xmax, size=n_gen)
-        v = uniform(low=ymin, high=ymax, size=n_gen)
-        s = uniform(size=n_gen)
-
-        def mask_out(s, u, v):
-            return s >= self.psi(u, v)
-
-        if cutting_margin:
-            mask_out_nocut = mask_out
-
-            def mask_out(s, u, v):
-                return np.logical_or(
-                    mask_out_nocut(s, u, v),
-                    ~self.rfbucket.is_in_separatrix(u, v, cutting_margin))
-
-        # masked_out = ~(s<self.psi(u, v))
-        masked_out = mask_out(s, u, v)
-        while np.any(masked_out):
-            masked_ids = np.where(masked_out)[0]
-            n_gen = len(masked_ids)
-            u[masked_out] = uniform(low=xmin, high=xmax, size=n_gen)
-            v[masked_out] = uniform(low=ymin, high=ymax, size=n_gen)
-            s[masked_out] = uniform(size=n_gen)
-            # masked_out = ~(s<self.psi(u, v))
-            masked_out[masked_ids] = mask_out(
-                s[masked_out], u[masked_out], v[masked_out]
-            )
-            if self.verbose_regeneration:
-                self.prints(
-                    'Thou shalt not give up! :-) '
-                    'Regenerating {0} macro-particles...'.format(n_gen))
-
-        return u, v, self.psi, self.linedensity
-
-    def _compute_sigma(self, rfbucket, psi):
-
-        f = lambda x, y: self.psi(x, y)
-        Q = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)
-        f = lambda x, y: psi(x, y)*x
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
-        f = lambda x, y: psi(x, y)*(x-M)**2
-        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
-        var_x = V
-
-        return np.sqrt(var_x)
-
-    def _compute_emittance(self, rfbucket, psi):
-
-        f = lambda x, y: self.psi(x, y)
-        Q = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)
-
-        f = lambda x, y: psi(x, y)*x
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
-        f = lambda x, y: psi(x, y)*(x-M)**2
-        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
-        mean_x = M
-        var_x  = V
-
-        f = lambda x, y: psi(x, y)*y
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
-        f = lambda x, y: psi(x, y)*(y-M)**2
-        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
-        mean_y = M
-        var_y  = V
-
-        f = lambda x, y: psi(x, y)*(x-mean_x)*(y-mean_y)
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
-        mean_xy = M
-
-        return (np.sqrt(var_x*var_y - mean_xy**2) *
-                4*np.pi*rfbucket.p0/np.abs(rfbucket.charge))
-
-
-class StationaryExponential(object):
-
-    def __init__(self, H, Hmax=None, Hcut=0):
-        self.H = H
-        self.H0 = 1
-        if not Hmax:
-            self.Hmax = H(0, 0)
-        else:
-            self.Hmax = Hmax
-        self.Hcut = Hcut
-
-    def _psi(self, H):
-        return np.exp((H - self.Hmax) / self.H0) - np.exp(-self.Hmax / self.H0)
-
-    def function(self, z, dp):
-        return self._psi(self.H(z, dp).clip(min=0)) / self._psi(self.Hmax)
-
-
 class HEADTAILcoords(object):
     '''The classic HEADTAIL phase space.'''
     coordinates = ('x', 'xp', 'y', 'yp', 'z', 'dp')
diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
new file mode 100644
index 00000000..797dd989
--- /dev/null
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -0,0 +1,245 @@
+'''
+@author Kevin Li, Adrian Oeftiger, Stefan Hegglin
+@date 16.06.2017
+@brief module for matching longitudinal particle distributions to an RFBucket instance
+'''
+
+from __future__ import division, print_function
+
+import numpy as np
+from scipy.optimize import brentq, newton
+from scipy.constants import e, c
+
+from ..cobra_functions.pdf_integrators_2d import quad2d
+
+from . import Printing
+
+from functools import partial
+
+class RFBucketMatcher(Printing):
+
+    def __init__(self, rfbucket, distribution_type=None, sigma_z=None,
+                 epsn_z=None, verbose_regeneration=False, psi=None,
+                 *args, **kwargs):
+
+        if psi is not None:
+            self.warns('\n\n*** DEPRECATED: keyword argument "psi" will '
+                       'be removed in a future PyHEADTAIL release!')
+            if distribution_type is not None:
+                raise TypeError('RFBucketMatcher accepts either '
+                                'distribution_type or psi as argument. ')
+        else:
+            if distribution_type is None:
+                raise TypeError(
+                    '__init__() takes at least 2 arguments (1 given)\n'
+                    '--> kwarg psi has been renamed to distribution_type.')
+
+        self.rfbucket = rfbucket
+        hamiltonian = partial(rfbucket.hamiltonian, make_convex=True)
+        hmax = rfbucket.h_sfp(make_convex=True)
+
+        self.psi_object = distribution_type(hamiltonian, hmax)
+        self.psi = self.psi_object.function
+
+        self.verbose_regeneration = verbose_regeneration
+
+        if sigma_z and not epsn_z:
+            self.variable = sigma_z
+            self.psi_for_variable = self.psi_for_bunchlength_newton_method
+        elif not sigma_z and epsn_z:
+            self.variable = epsn_z
+            self.psi_for_variable = self.psi_for_emittance_newton_method
+        else:
+            raise ValueError("Can not generate mismatched matched " +
+                             "distribution! (Don't provide both sigma_z " +
+                             "and epsn_z!)")
+
+    def psi_for_emittance_newton_method(self, epsn_z):
+
+        # Maximum emittance
+        self.psi_object.H0 = self.rfbucket.guess_H0(
+            self.rfbucket.circumference, from_variable='sigma')
+        epsn_max = self._compute_emittance(self.rfbucket, self.psi)
+        if epsn_z > epsn_max:
+            self.warns('Given RMS emittance does not fit into bucket. ' +
+                       'Using (maximum) full bucket emittance ' +
+                       str(epsn_max*0.99) + 'eV s instead.')
+            epsn_z = epsn_max*0.99
+        self.prints('*** Maximum RMS emittance ' + str(epsn_max) + 'eV s.')
+
+        def get_zc_for_epsn_z(ec):
+            self.psi_object.H0 = self.rfbucket.guess_H0(
+                ec, from_variable='epsn')
+            emittance = self._compute_emittance(self.rfbucket, self.psi)
+
+            self.prints('... distance to target emittance: ' +
+                        '{:.2e}'.format(emittance-epsn_z))
+
+            return emittance-epsn_z
+
+        try:
+            ec_bar = newton(get_zc_for_epsn_z, epsn_z, tol=5e-4)
+        except RuntimeError:
+            self.warns('RFBucketMatcher -- failed to converge while ' +
+                       'using Newton-Raphson method. ' +
+                       'Instead trying classic Brent method...')
+            ec_bar = brentq(get_zc_for_epsn_z, epsn_z/2, 2*epsn_max)
+
+        self.psi_object.H0 = self.rfbucket.guess_H0(
+            ec_bar, from_variable='epsn')
+        emittance = self._compute_emittance(self.rfbucket, self.psi)
+        self.prints('--> Emittance: ' + str(emittance))
+        sigma = self._compute_sigma(self.rfbucket, self.psi)
+        self.prints('--> Bunch length:' + str(sigma))
+
+    def psi_for_bunchlength_newton_method(self, sigma):
+
+        # Maximum bunch length
+        self.psi_object.H0 = self.rfbucket.guess_H0(
+            self.rfbucket.circumference, from_variable='sigma')
+        sigma_max = self._compute_sigma(self.rfbucket, self.psi)
+        if sigma > sigma_max:
+            self.warns('Given RMS bunch length does not fit into bucket. ' +
+                       'Using (maximum) full bucket RMS bunch length ' +
+                       str(sigma_max*0.99) + 'm instead.')
+            sigma = sigma_max*0.99
+        self.prints('*** Maximum RMS bunch length ' + str(sigma_max) + 'm.')
+
+        def get_zc_for_sigma(zc):
+            '''Width for bunch length'''
+            self.psi_object.H0 = self.rfbucket.guess_H0(
+                zc, from_variable='sigma')
+            length = self._compute_sigma(self.rfbucket, self.psi)
+
+            if np.isnan(length): raise ValueError
+
+            self.prints('... distance to target bunch length: ' +
+                        '{:.4e}'.format(length-sigma))
+
+            return length-sigma
+
+        zc_bar = newton(get_zc_for_sigma, sigma)
+
+        self.psi_object.H0 = self.rfbucket.guess_H0(
+            zc_bar, from_variable='sigma')
+        sigma = self._compute_sigma(self.rfbucket, self.psi)
+        self.prints('--> Bunch length: ' + str(sigma))
+        emittance = self._compute_emittance(self.rfbucket, self.psi)
+        self.prints('--> Emittance: ' + str(emittance))
+
+    def linedensity(self, xx):
+        quad_type = fixed_quad
+
+        L = []
+        try:
+            L = np.array([quad_type(lambda y: self.psi(x, y), 0,
+                                    self.p_limits(x))[0] for x in xx])
+        except TypeError:
+            L = quad_type(lambda y: self.psi(xx, y), 0, self.p_limits(xx))[0]
+        L = np.array(L)
+
+        return 2*L
+
+    def generate(self, macroparticlenumber, cutting_margin=0):
+        '''Generate a 2d phase space of n_particles particles randomly distributed
+        according to the particle distribution function psi within the region
+        [xmin, xmax, ymin, ymax].
+        '''
+        self.psi_for_variable(self.variable)
+
+        xmin, xmax = self.rfbucket.z_left, self.rfbucket.z_right
+        ymin = -self.rfbucket.dp_max(self.rfbucket.z_right)
+        ymax = -ymin
+
+        # rejection sampling
+        uniform = np.random.uniform
+        n_gen = macroparticlenumber
+        u = uniform(low=xmin, high=xmax, size=n_gen)
+        v = uniform(low=ymin, high=ymax, size=n_gen)
+        s = uniform(size=n_gen)
+
+        def mask_out(s, u, v):
+            return s >= self.psi(u, v)
+
+        if cutting_margin:
+            mask_out_nocut = mask_out
+
+            def mask_out(s, u, v):
+                return np.logical_or(
+                    mask_out_nocut(s, u, v),
+                    ~self.rfbucket.is_in_separatrix(u, v, cutting_margin))
+
+        # masked_out = ~(s<self.psi(u, v))
+        masked_out = mask_out(s, u, v)
+        while np.any(masked_out):
+            masked_ids = np.where(masked_out)[0]
+            n_gen = len(masked_ids)
+            u[masked_out] = uniform(low=xmin, high=xmax, size=n_gen)
+            v[masked_out] = uniform(low=ymin, high=ymax, size=n_gen)
+            s[masked_out] = uniform(size=n_gen)
+            # masked_out = ~(s<self.psi(u, v))
+            masked_out[masked_ids] = mask_out(
+                s[masked_out], u[masked_out], v[masked_out]
+            )
+            if self.verbose_regeneration:
+                self.prints(
+                    'Thou shalt not give up! :-) '
+                    'Regenerating {0} macro-particles...'.format(n_gen))
+
+        return u, v, self.psi, self.linedensity
+
+    def _compute_sigma(self, rfbucket, psi):
+
+        f = lambda x, y: self.psi(x, y)
+        Q = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)
+        f = lambda x, y: psi(x, y)*x
+        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        f = lambda x, y: psi(x, y)*(x-M)**2
+        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        var_x = V
+
+        return np.sqrt(var_x)
+
+    def _compute_emittance(self, rfbucket, psi):
+
+        f = lambda x, y: self.psi(x, y)
+        Q = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)
+
+        f = lambda x, y: psi(x, y)*x
+        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        f = lambda x, y: psi(x, y)*(x-M)**2
+        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        mean_x = M
+        var_x  = V
+
+        f = lambda x, y: psi(x, y)*y
+        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        f = lambda x, y: psi(x, y)*(y-M)**2
+        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        mean_y = M
+        var_y  = V
+
+        f = lambda x, y: psi(x, y)*(x-mean_x)*(y-mean_y)
+        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        mean_xy = M
+
+        return (np.sqrt(var_x*var_y - mean_xy**2) *
+                4*np.pi*rfbucket.p0/np.abs(rfbucket.charge))
+
+
+class StationaryExponential(object):
+
+    def __init__(self, H, Hmax=None, Hcut=0):
+        self.H = H
+        self.H0 = 1
+        if not Hmax:
+            self.Hmax = H(0, 0)
+        else:
+            self.Hmax = Hmax
+        self.Hcut = Hcut
+
+    def _psi(self, H):
+        return np.exp((H - self.Hmax) / self.H0) - np.exp(-self.Hmax / self.H0)
+
+    def function(self, z, dp):
+        return self._psi(self.H(z, dp).clip(min=0)) / self._psi(self.Hmax)

From 134696bf22ed02d387b60895252819f56d47847b Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 16 Jun 2017 17:41:57 +0200
Subject: [PATCH 13/43] rfbucket_matching: fixed linedensity bug

linedensity did not work any more as someone moved some
integration methods to a separate cobra_functions module.
---
 PyHEADTAIL/particles/rfbucket_matching.py | 25 +++++++++++------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 797dd989..e364cd72 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 from scipy.optimize import brentq, newton
+from scipy.integrate import fixed_quad
 from scipy.constants import e, c
 
 from ..cobra_functions.pdf_integrators_2d import quad2d
@@ -50,18 +51,17 @@ def __init__(self, rfbucket, distribution_type=None, sigma_z=None,
             self.variable = epsn_z
             self.psi_for_variable = self.psi_for_emittance_newton_method
         else:
-            raise ValueError("Can not generate mismatched matched " +
-                             "distribution! (Don't provide both sigma_z " +
+            raise ValueError("Can not generate mismatched matched "
+                             "distribution! (Don't provide both sigma_z "
                              "and epsn_z!)")
 
     def psi_for_emittance_newton_method(self, epsn_z):
-
         # Maximum emittance
         self.psi_object.H0 = self.rfbucket.guess_H0(
             self.rfbucket.circumference, from_variable='sigma')
         epsn_max = self._compute_emittance(self.rfbucket, self.psi)
         if epsn_z > epsn_max:
-            self.warns('Given RMS emittance does not fit into bucket. ' +
+            self.warns('Given RMS emittance does not fit into bucket. '
                        'Using (maximum) full bucket emittance ' +
                        str(epsn_max*0.99) + 'eV s instead.')
             epsn_z = epsn_max*0.99
@@ -80,8 +80,8 @@ def get_zc_for_epsn_z(ec):
         try:
             ec_bar = newton(get_zc_for_epsn_z, epsn_z, tol=5e-4)
         except RuntimeError:
-            self.warns('RFBucketMatcher -- failed to converge while ' +
-                       'using Newton-Raphson method. ' +
+            self.warns('RFBucketMatcher -- failed to converge while '
+                       'using Newton-Raphson method. '
                        'Instead trying classic Brent method...')
             ec_bar = brentq(get_zc_for_epsn_z, epsn_z/2, 2*epsn_max)
 
@@ -93,13 +93,12 @@ def get_zc_for_epsn_z(ec):
         self.prints('--> Bunch length:' + str(sigma))
 
     def psi_for_bunchlength_newton_method(self, sigma):
-
         # Maximum bunch length
         self.psi_object.H0 = self.rfbucket.guess_H0(
             self.rfbucket.circumference, from_variable='sigma')
         sigma_max = self._compute_sigma(self.rfbucket, self.psi)
         if sigma > sigma_max:
-            self.warns('Given RMS bunch length does not fit into bucket. ' +
+            self.warns('Given RMS bunch length does not fit into bucket. '
                        'Using (maximum) full bucket RMS bunch length ' +
                        str(sigma_max*0.99) + 'm instead.')
             sigma = sigma_max*0.99
@@ -127,15 +126,15 @@ def get_zc_for_sigma(zc):
         emittance = self._compute_emittance(self.rfbucket, self.psi)
         self.prints('--> Emittance: ' + str(emittance))
 
-    def linedensity(self, xx):
-        quad_type = fixed_quad
-
+    def linedensity(self, xx, quad_type=fixed_quad):
         L = []
         try:
             L = np.array([quad_type(lambda y: self.psi(x, y), 0,
-                                    self.p_limits(x))[0] for x in xx])
+                                    self.rfbucket.separatrix(x))[0]
+                          for x in xx])
         except TypeError:
-            L = quad_type(lambda y: self.psi(xx, y), 0, self.p_limits(xx))[0]
+            L = quad_type(lambda y: self.psi(xx, y), 0,
+                          self.rfbucket.separatrix(xx))[0]
         L = np.array(L)
 
         return 2*L

From 58223dccef2fe11d3bc822829b9cd9979e26bab6 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 16 Jun 2017 18:06:56 +0200
Subject: [PATCH 14/43] rfbucket_matching: restructure
 StationaryDistribution...

... in view of adding more distribution types. For now
only have StationaryThermal (StationaryExponential).
---
 PyHEADTAIL/particles/rfbucket_matching.py | 35 +++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index e364cd72..5a3e6813 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -17,6 +17,8 @@
 
 from functools import partial
 
+from abc import abstractmethod
+
 class RFBucketMatcher(Printing):
 
     def __init__(self, rfbucket, distribution_type=None, sigma_z=None,
@@ -226,19 +228,40 @@ def _compute_emittance(self, rfbucket, psi):
                 4*np.pi*rfbucket.p0/np.abs(rfbucket.charge))
 
 
-class StationaryExponential(object):
-
-    def __init__(self, H, Hmax=None, Hcut=0):
+class StationaryDistribution(object):
+    def __init__(self, H, Hmax=None, Hcut=0, H0=1):
         self.H = H
-        self.H0 = 1
+        self.H0 = H0
         if not Hmax:
             self.Hmax = H(0, 0)
         else:
             self.Hmax = Hmax
         self.Hcut = Hcut
 
+    @abstractmethod
     def _psi(self, H):
-        return np.exp((H - self.Hmax) / self.H0) - np.exp(-self.Hmax / self.H0)
+        '''Define the distribution value for the given H, the output
+        lies in the interval [0,1]. This is the central function to
+        be implemented by stationary distributions.
+        '''
+        pass
 
     def function(self, z, dp):
-        return self._psi(self.H(z, dp).clip(min=0)) / self._psi(self.Hmax)
+        psi = self._psi(self.H(z, dp).clip(min=self.Hcut))
+        norm = self._psi(self.Hmax)
+        return psi / norm
+
+
+class StationaryExponential(StationaryDistribution):
+    '''Thermal Boltzmann distribution \psi ~ \exp(-H/H0).
+    For a quadratic harmonic oscillator Hamiltonian this gives the
+    bi-Gaussian phase space distribution.
+    '''
+    def _psi(self, H):
+        # convert from convex Hamiltonian
+        # (SFP being the maximum and the separatrix having zero value)
+        # to conventional literature scale (zero-valued minimum at SFP)
+        Hsep = self.Hcut + self.Hmax
+        Hn = Hsep - H
+        # f(Hn) - f(Hsep)
+        return np.exp(-Hn / self.H0) - np.exp(-Hsep / self.H0)

From be248f677323342047be2c3105d2a11495e49a71 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 16 Jun 2017 18:11:46 +0200
Subject: [PATCH 15/43] rfbucket_matching: rename StationaryExponential to
 ThermalDistribution

---
 PyHEADTAIL/particles/generators.py        | 7 +++++--
 PyHEADTAIL/particles/rfbucket_matching.py | 6 ++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/PyHEADTAIL/particles/generators.py b/PyHEADTAIL/particles/generators.py
index c6978083..29c356d4 100644
--- a/PyHEADTAIL/particles/generators.py
+++ b/PyHEADTAIL/particles/generators.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 from particles import Particles
-from rfbucket_matching import RFBucketMatcher, StationaryExponential
+from rfbucket_matching import RFBucketMatcher, ThermalDistribution
 
 from . import Printing
 
@@ -132,7 +132,7 @@ def _longitudinal_linear_matcher(beam, *args, **kwargs):
 
 
 def RF_bucket_distribution(rfbucket, sigma_z=None, epsn_z=None,
-                           margin=0, distribution_type=StationaryExponential,
+                           margin=0, distribution_type=ThermalDistribution,
                            *args, **kwargs):
     '''Return a distribution function which generates particles
     which are matched to the specified bucket and target emittance or std
@@ -143,6 +143,9 @@ def RF_bucket_distribution(rfbucket, sigma_z=None, epsn_z=None,
         epsn_z: target normalized emittance in z-direction
         margin: relative margin from the separatrix towards the
             inner stable fix point in which particles are avoided
+        distribution_type: longitudinal distribution type from
+            rfbucket_matching (default is ThermalDistribution which
+            produces a Gaussian-like matched Boltzmann distribution)
     Returns:
         A matcher with the specified bucket properties (closure)
     Raises:
diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 5a3e6813..06cd4284 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -251,8 +251,7 @@ def function(self, z, dp):
         norm = self._psi(self.Hmax)
         return psi / norm
 
-
-class StationaryExponential(StationaryDistribution):
+class ThermalDistribution(StationaryDistribution):
     '''Thermal Boltzmann distribution \psi ~ \exp(-H/H0).
     For a quadratic harmonic oscillator Hamiltonian this gives the
     bi-Gaussian phase space distribution.
@@ -265,3 +264,6 @@ def _psi(self, H):
         Hn = Hsep - H
         # f(Hn) - f(Hsep)
         return np.exp(-Hn / self.H0) - np.exp(-Hsep / self.H0)
+
+# backwards compatibility:
+StationaryExponential = ThermalDistribution

From 77eb16f76e297feb512d0d77631ab3ee3a61c822 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 16 Jun 2017 18:25:10 +0200
Subject: [PATCH 16/43] rfbucket_matching: adding qGaussian, parabolic and
 waterbag

---
 PyHEADTAIL/particles/rfbucket_matching.py | 43 +++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 06cd4284..b6a017f4 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -267,3 +267,46 @@ def _psi(self, H):
 
 # backwards compatibility:
 StationaryExponential = ThermalDistribution
+
+class QGaussianDistribution(StationaryDistribution):
+    '''Specific Tsallis q-Gaussian distribution for q=3/5 for now,
+    leading to \psi ~ (1 - H/H0)^2, this may be generalised.
+    '''
+    n = 2
+
+    def _psi(self, H):
+        # convert from convex Hamiltonian
+        # (SFP being the maximum and the separatrix having zero value)
+        # to conventional scale (zero-valued minimum at SFP)
+        Hsep = self.Hcut + self.Hmax
+        Hn = Hsep - H
+        dist = (1 - Hn / self.H0).clip(min=self.Hcut)**self.n
+        return dist.clip(min=self.Hcut)
+
+class ParabolicDistribution(StationaryDistribution):
+    '''The parabolic profile distribution is a specific case of the
+    present implementation of the q-Gaussian distribution for n = 1/2,
+    \psi ~ \sqrt(1 - H/H0).
+    For a quadratic harmonic oscillator Hamiltonian this distribution
+    provides a parabolic line density.
+    '''
+    def _psi(self, H):
+        # convert from convex Hamiltonian
+        # (SFP being the maximum and the separatrix having zero value)
+        # to conventional scale (zero-valued minimum at SFP)
+        Hsep = self.Hcut + self.Hmax
+        Hn = Hsep - H
+        return np.sqrt((1 - Hn / self.H0).clip(min=self.Hcut))
+
+class WaterbagDistribution(StationaryDistribution):
+    '''The waterbag distribution has a constant Hamiltonian distribution
+    until a cutoff, \psi ~ \Theta(H - H0) with \Theta the Heaviside
+    step function.
+    '''
+    def _psi(self, H):
+        # convert from convex Hamiltonian
+        # (SFP being the maximum and the separatrix having zero value)
+        # to conventional scale (zero-valued minimum at SFP)
+        Hsep = self.Hcut + self.Hmax
+        Hn = Hsep - H
+        return np.piecewise(Hn, [Hn <= self.H0, Hn > self.H0], [1./self.H0, 0])

From 9b77d90fdd5ae6e02e2b8b3d21f216cdff8a1258 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 16 Jun 2017 18:25:52 +0200
Subject: [PATCH 17/43] rfbucket_matching: some renaming in anticipation...

... of e.g. the improvement of the moment computation,
where we should start integrating the distribution type
from the incontinuous derivative point (where \psi(H)
departs from zero, i.e. where the clipping starts).
---
 PyHEADTAIL/particles/rfbucket_matching.py | 44 +++++++++++++++--------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index b6a017f4..4fbe2953 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -57,6 +57,8 @@ def __init__(self, rfbucket, distribution_type=None, sigma_z=None,
                              "distribution! (Don't provide both sigma_z "
                              "and epsn_z!)")
 
+    ### analytic matching methods:
+
     def psi_for_emittance_newton_method(self, epsn_z):
         # Maximum emittance
         self.psi_object.H0 = self.rfbucket.guess_H0(
@@ -69,7 +71,7 @@ def psi_for_emittance_newton_method(self, epsn_z):
             epsn_z = epsn_max*0.99
         self.prints('*** Maximum RMS emittance ' + str(epsn_max) + 'eV s.')
 
-        def get_zc_for_epsn_z(ec):
+        def error_from_target_epsn(ec):
             self.psi_object.H0 = self.rfbucket.guess_H0(
                 ec, from_variable='epsn')
             emittance = self._compute_emittance(self.rfbucket, self.psi)
@@ -80,12 +82,12 @@ def get_zc_for_epsn_z(ec):
             return emittance-epsn_z
 
         try:
-            ec_bar = newton(get_zc_for_epsn_z, epsn_z, tol=5e-4)
+            ec_bar = newton(error_from_target_epsn, epsn_z, tol=5e-4)
         except RuntimeError:
             self.warns('RFBucketMatcher -- failed to converge while '
                        'using Newton-Raphson method. '
                        'Instead trying classic Brent method...')
-            ec_bar = brentq(get_zc_for_epsn_z, epsn_z/2, 2*epsn_max)
+            ec_bar = brentq(error_from_target_epsn, epsn_z/2, 2*epsn_max)
 
         self.psi_object.H0 = self.rfbucket.guess_H0(
             ec_bar, from_variable='epsn')
@@ -106,10 +108,9 @@ def psi_for_bunchlength_newton_method(self, sigma):
             sigma = sigma_max*0.99
         self.prints('*** Maximum RMS bunch length ' + str(sigma_max) + 'm.')
 
-        def get_zc_for_sigma(zc):
+        def error_from_target_sigma(H0):
             '''Width for bunch length'''
-            self.psi_object.H0 = self.rfbucket.guess_H0(
-                zc, from_variable='sigma')
+            self.psi_object.H0 = H0
             length = self._compute_sigma(self.rfbucket, self.psi)
 
             if np.isnan(length): raise ValueError
@@ -119,10 +120,19 @@ def get_zc_for_sigma(zc):
 
             return length-sigma
 
-        zc_bar = newton(get_zc_for_sigma, sigma)
+        try:
+            H0_init = self.rfbucket.guess_H0(
+                sigma, from_variable='sigma')
+            H0_fin = newton(error_from_target_sigma, H0_init)
+        except RuntimeError:
+            self.warns('RFBucketMatcher -- failed to converge while '
+                       'using Newton-Raphson method. '
+                       'Instead trying classic Brent method...')
+            H0_max = max(self.rfbucket.hamiltonian(
+                self.rfbucket.z_sfp, 0, make_convex=True))*2
+            H0_fin = brentq(error_from_target_sigma, H0_max*1e-3, H0_max)
 
-        self.psi_object.H0 = self.rfbucket.guess_H0(
-            zc_bar, from_variable='sigma')
+        self.psi_object.H0 = H0_fin
         sigma = self._compute_sigma(self.rfbucket, self.psi)
         self.prints('--> Bunch length: ' + str(sigma))
         emittance = self._compute_emittance(self.rfbucket, self.psi)
@@ -190,26 +200,30 @@ def mask_out(s, u, v):
         return u, v, self.psi, self.linedensity
 
     def _compute_sigma(self, rfbucket, psi):
+        z_left = rfbucket.z_left
+        z_right = rfbucket.z_right
 
         f = lambda x, y: self.psi(x, y)
-        Q = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)
+        Q = quad2d(f, rfbucket.separatrix, z_left, z_right)
         f = lambda x, y: psi(x, y)*x
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
         f = lambda x, y: psi(x, y)*(x-M)**2
-        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        V = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
         var_x = V
 
         return np.sqrt(var_x)
 
     def _compute_emittance(self, rfbucket, psi):
+        z_left = rfbucket.z_left
+        z_right = rfbucket.z_right
 
         f = lambda x, y: self.psi(x, y)
-        Q = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)
+        Q = quad2d(f, rfbucket.separatrix, z_left, z_right)
 
         f = lambda x, y: psi(x, y)*x
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
         f = lambda x, y: psi(x, y)*(x-M)**2
-        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        V = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
         mean_x = M
         var_x  = V
 

From ffd12db46eccb529390985fe0ee38f40d35cfe8a Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Mon, 19 Jun 2017 11:59:32 +0200
Subject: [PATCH 18/43] gpu_wrap: slice_to_particles should be zero filled...

... for all particles outside the slicing area. Fixes
the bug when running slice_to_particles on the GPU
for slicesets with particles outside the slicing area.
(Before this crashed with a LogicError as the slicing
index accessed <0 or >=n_slices.)
---
 PyHEADTAIL/gpu/gpu_wrap.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/PyHEADTAIL/gpu/gpu_wrap.py b/PyHEADTAIL/gpu/gpu_wrap.py
index 0a2a2ec1..cda90163 100644
--- a/PyHEADTAIL/gpu/gpu_wrap.py
+++ b/PyHEADTAIL/gpu/gpu_wrap.py
@@ -286,26 +286,32 @@ def thrust_mean_and_std_per_slice(sliceset, u, stream=None):
 
 
     _slice_to_particles = pycuda.elementwise.ElementwiseKernel(
-        "unsigned int* slice_index_of_particle, double* input_slice_quantity, "
-        "double* output_particle_array",
+        "const int* slice_index_of_particle, "
+        "const double* input_slice_quantity, "
+        "const int n_slices, double* output_particle_array",
         # i is the particle index within slice_index_of_particle
-        "output_particle_array[i] = "
-            "input_slice_quantity[slice_index_of_particle[i]]",
+        "const int sid = slice_index_of_particle[i];\n"
+        "if (sid >= 0 && sid < n_slices) {"
+            "output_particle_array[i] = "
+                "input_slice_quantity[sid];"
+        "} else {"
+            "output_particle_array[i] = 0;"
+        "};",
         "slice_to_particles_kernel"
     )
     def slice_to_particles(sliceset, slice_array, particle_array=None):
         '''Convert slice_array with entries for each slice to a
         particle array with the respective entry of each particle
         given by its slice_array value via the slice that the
-        particle belongs to. If provided, particle_array should be a
-        zero-filled destination array.
+        particle belongs to. If the particle is located outside
+        the slicing area, its particle_array entry is assigned to zero.
         '''
         if particle_array == None:
             particle_array = pycuda.gpuarray.empty(
                 sliceset.slice_index_of_particle.shape,
                 dtype=np.float64, allocator=gpu_utils.memory_pool.allocate)
         _slice_to_particles(sliceset.slice_index_of_particle,
-                            slice_array, particle_array)
+                            slice_array, sliceset.n_slices, particle_array)
         return particle_array
 
 

From f3943c0e677f43e359eb3cb6a1e09d79046b508d Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 21 Jun 2017 11:18:33 +0200
Subject: [PATCH 19/43] rfbucket_matching: complete z_left, z_right assignments

---
 PyHEADTAIL/particles/rfbucket_matching.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 4fbe2953..66ef01fe 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -228,14 +228,14 @@ def _compute_emittance(self, rfbucket, psi):
         var_x  = V
 
         f = lambda x, y: psi(x, y)*y
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
         f = lambda x, y: psi(x, y)*(y-M)**2
-        V = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        V = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
         mean_y = M
         var_y  = V
 
         f = lambda x, y: psi(x, y)*(x-mean_x)*(y-mean_y)
-        M = quad2d(f, rfbucket.separatrix, rfbucket.z_left, rfbucket.z_right)/Q
+        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
         mean_xy = M
 
         return (np.sqrt(var_x*var_y - mean_xy**2) *

From f0ac65857278c59975f4ee8b22c4c22a46c4ab31 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 21 Jun 2017 11:30:25 +0200
Subject: [PATCH 20/43] generators: backwards compatibility for
 StationaryExponential

... moving StationaryExponential (==ThermalDistribution) from
rfbucket_matching to generators where it had been accessible
before the splitting into generators/rfbucket_matching.
---
 PyHEADTAIL/particles/generators.py        | 3 +++
 PyHEADTAIL/particles/rfbucket_matching.py | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/PyHEADTAIL/particles/generators.py b/PyHEADTAIL/particles/generators.py
index 29c356d4..7527d4e2 100644
--- a/PyHEADTAIL/particles/generators.py
+++ b/PyHEADTAIL/particles/generators.py
@@ -11,6 +11,9 @@
 from particles import Particles
 from rfbucket_matching import RFBucketMatcher, ThermalDistribution
 
+# backwards compatibility:
+StationaryExponential = ThermalDistribution
+
 from . import Printing
 
 from scipy.constants import e, c
diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 66ef01fe..08efc9c0 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -279,9 +279,6 @@ def _psi(self, H):
         # f(Hn) - f(Hsep)
         return np.exp(-Hn / self.H0) - np.exp(-Hsep / self.H0)
 
-# backwards compatibility:
-StationaryExponential = ThermalDistribution
-
 class QGaussianDistribution(StationaryDistribution):
     '''Specific Tsallis q-Gaussian distribution for q=3/5 for now,
     leading to \psi ~ (1 - H/H0)^2, this may be generalised.

From 8a3c30418d3a406433305cf71fd8b71161bf36d1 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 21 Jun 2017 11:43:53 +0200
Subject: [PATCH 21/43] pdf_integrators_2d: reviving and restructuring 2D quad
 integrators

Allow to compute 2D particle distribution function moments up
to second order for a domain parametrised by
[xmin, xmax], [ymin(x), ymax(x)].
This is particularly useful for RFBucket matching within a
RFBucket.interval for x and the separatrix functions for y.

Started with the dblquad based integrators, cumtrapz is to come.
So far the RFBucketMatchor still works nicely with these functions!
---
 .../cobra_functions/pdf_integrators_2d.py     | 148 +++++++++++++-----
 1 file changed, 107 insertions(+), 41 deletions(-)

diff --git a/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py b/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
index dda6168c..72297ee2 100644
--- a/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
+++ b/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
@@ -8,110 +8,176 @@ def quad2d(f, ylimits, xmin, xmax):
     return Q
 
 
-def _compute_zero_quad(self, psi, p_sep, xmin, xmax):
+def compute_zero_quad(psi, ylimit_min, ylimit_max, xmin, xmax):
+    '''Compute the zeroth moment of the distribution function psi from
+    xmin to xmax between the contours ylimit_min and ylimit_max
+    (e.g. an RFBucket separatrix) using the numerical
+    scipy.integrate.dblquad integration method.
+
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimit_min, ylimit_max: contour functions yielding the lower
+          and the upper y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
     '''
-    Compute the variance of the distribution function psi from xmin
-    to xmax along the contours p_sep using numerical integration
-    methods.
-    '''
-
     Q, error = dblquad(lambda y, x: psi(x, y), xmin, xmax,
-                lambda x: 0, lambda x: p_sep(x))
+                       ylimit_min, ylimit_max)
 
     return Q
 
-def _compute_mean_quad(self, psi, p_sep, xmin, xmax):
-    '''
-    Compute the variance of the distribution function psi from xmin
-    to xmax along the contours p_sep using numerical integration
-    methods.
+def compute_mean_quad(psi, ylimit_min, ylimit_max, xmin, xmax, direction='x'):
+    '''Compute the first moment of the distribution function psi from
+    xmin to xmax between the contours ylimit_min and ylimit_max
+    (e.g. an RFBucket separatrix) using the numerical
+    scipy.integrate.dblquad integration method.
+
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimit_min, ylimit_max: contour functions yielding the lower
+          and the upper y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
+        - direction: 'x' or 'y' for calculating the mean in the first
+          or second argument of psi, respectively (default: 'x')
     '''
 
-    Q = self._compute_zero_quad(psi, p_sep, xmin, xmax)
-    M, error = dblquad(lambda y, x: x * psi(x, y), xmin, xmax,
-                lambda x: 0, lambda x: p_sep(x))
+    Q = compute_zero_quad(psi, ylimit_min, ylimit_max, xmin, xmax)
+    if direction is 'x':
+        f = lambda y, x: x * psi(x, y)
+    elif direction is 'y':
+        f = lambda y, x: y * psi(x, y)
+    else:
+        raise ValueError('direction needs to be either "x" or "y".')
+
+    M, error = dblquad(f, xmin, xmax, ylimit_min, ylimit_max)
 
     return M/Q
 
-def _compute_std_quad(self, psi, p_sep, xmin, xmax):
+def compute_var_quad(psi, ylimit_min, ylimit_max, xmin, xmax, direction='x'):
+    '''Compute the second moment (variance) with respect to the x or
+    y direction of the distribution function psi from xmin to xmax
+    between the contours ylimit_min and ylimit_max (e.g. an RFBucket
+    separatrix) using the numerical scipy.integrate.dblquad integration
+    method.
+
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimit_min, ylimit_max: contour functions yielding the lower
+          and the upper y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
+        - direction: 'x' or 'y' for calculating the standard deviation
+          in the first or second argument of psi, respectively
+          (default: 'x')
     '''
-    Compute the variance of the distribution function psi from xmin
-    to xmax along the contours p_sep using numerical integration
-    methods.
+
+    Q = compute_zero_quad(psi, ylimit_min, ylimit_max, xmin, xmax)
+    M = compute_mean_quad(psi, ylimit_min, ylimit_max, xmin, xmax, direction)
+    if direction is 'x':
+        f = lambda y, x: (x - M)**2 * psi(x, y)
+    elif direction is 'y':
+        f = lambda y, x: (y - M)**2 * psi(x, y)
+    else:
+        raise ValueError('direction needs to be either "x" or "y".')
+
+    V, error = dblquad(f, xmin, xmax, ylimit_min, ylimit_max)
+
+    return V/Q
+
+def compute_cov_quad(psi, ylimit_min, ylimit_max, xmin, xmax):
+    '''Compute the second moments (covariance matrix entries) of the
+    distribution function psi from xmin to xmax between the contours
+    ylimit_min and ylimit_max (e.g. an RFBucket separatrix) using the
+    numerical scipy.integrate.dblquad integration method.
+    For x the first and y the second argument of psi, return the tuple
+    (variance(x), covariance(x,y), variance(y)).
+
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimits: contour function yielding the y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
+
+    NB: assumes a symmetric distribution function with respect to the
+    second argument (the vertical direction).
     '''
 
-    Q = self._compute_zero_quad(psi, p_sep, xmin, xmax)
-    M = self._compute_mean_quad(psi, p_sep, xmin, xmax)
-    V, error = dblquad(lambda y, x: (x-M) ** 2 * psi(x, y), xmin, xmax,
-                       lambda x: 0, lambda x: p_sep(x))
+    Q = compute_zero_quad(psi, ylimit_min, ylimit_max, xmin, xmax)
+    M_x = compute_mean_quad(psi, ylimit_min, ylimit_max, xmin, xmax, 'x')
+    M_y = compute_mean_quad(psi, ylimit_min, ylimit_max, xmin, xmax, 'y')
 
-    return np.sqrt(V/Q)
+    V_x = compute_var_quad(psi, ylimit_min, ylimit_max, xmin, xmax, 'x')
+    V_y = compute_var_quad(psi, ylimit_min, ylimit_max, xmin, xmax, 'y')
+
+    f = lambda y, x: (x - M_x) * (y - M_y) * psi(x, y)
+    C_xy, error = dblquad(f, xmin, xmax, ylimit_min, ylimit_max)
+
+    C_xy /= Q
+
+    return V_x, C_xy, V_y
 
-def _compute_zero_cumtrapz(self, psi, p_sep, xmin, xmax):
+def compute_zero_cumtrapz(psi, ylimits, xmin, xmax, n_samples=257):
 
-    x_arr = np.linspace(xmin, xmax, 257)
+    x_arr = np.linspace(xmin, xmax, n_samples)
     dx = x_arr[1] - x_arr[0]
 
     Q = 0
     for x in x_arr:
-        y = np.linspace(0, p_sep(x), 257)
+        y = np.linspace(0, ylimits(x), n_samples)
         z = psi(x, y)
         Q += cumtrapz(z, y)[-1]
     Q *= dx
 
     return Q
 
-def _compute_mean_cumtrapz(self, psi, p_sep, xmin, xmax):
+def compute_mean_cumtrapz(psi, ylimits, xmin, xmax, n_samples=257):
 
-    Q = self._compute_zero_cumtrapz(psi, p_sep, xmin, xmax)
+    Q = compute_zero_cumtrapz(psi, ylimits, xmin, xmax, n_samples)
 
-    x_arr = np.linspace(xmin, xmax, 257)
+    x_arr = np.linspace(xmin, xmax, n_samples)
     dx = x_arr[1] - x_arr[0]
 
     M = 0
     for x in x_arr:
-        y = np.linspace(0, p_sep(x), 257)
+        y = np.linspace(0, ylimits(x), n_samples)
         z = x * psi(x, y)
         M += cumtrapz(z, y)[-1]
     M *= dx
 
     return M/Q
 
-def _compute_std_cumtrapz(self, psi, p_sep, xmin, xmax):
+def compute_std_cumtrapz(psi, ylimits, xmin, xmax, n_samples=257):
     '''
     Compute the variance of the distribution function psi from xmin
-    to xmax along the contours p_sep using numerical integration
+    to xmax along the contours ylimits using numerical integration
     methods.
     '''
 
-    Q = self._compute_zero_cumtrapz(psi, p_sep, xmin, xmax)
-    M = self._compute_mean_cumtrapz(psi, p_sep, xmin, xmax)
+    Q = compute_zero_cumtrapz(psi, ylimits, xmin, xmax, n_samples)
+    M = compute_mean_cumtrapz(psi, ylimits, xmin, xmax, n_samples)
 
-    x_arr = np.linspace(xmin, xmax, 257)
+    x_arr = np.linspace(xmin, xmax, n_samples)
     dx = x_arr[1] - x_arr[0]
 
     V = 0
     for x in x_arr:
-        y = np.linspace(0, p_sep(x), 257)
+        y = np.linspace(0, ylimits(x), n_samples)
         z = (x-M)**2 * psi(x, y)
         V += cumtrapz(z, y)[-1]
     V *= dx
 
     return np.sqrt(V/Q)
 
-def _compute_std_romberg(self, psi, p_sep, xmin, xmax):
+def compute_std_romberg(psi, ylimits, xmin, xmax, n_samples=257):
     '''
     Compute the variance of the distribution function psi from xmin
-    to xmax along the contours p_sep using numerical integration
+    to xmax along the contours ylimits using numerical integration
     methods.
     '''
 
-    x_arr = np.linspace(xmin, xmax, 257)
+    x_arr = np.linspace(xmin, xmax, n_samples)
     dx = x_arr[1] - x_arr[0]
 
     Q, V = 0, 0
     for x in x_arr:
-        y = np.linspace(0, p_sep(x), 257)
+        y = np.linspace(0, ylimits(x), n_samples)
         dy = y[1] - y[0]
         z = psi(x, y)
         Q += romb(z, dy)

From e8b6d972fabe03c15a4dc27f8a10f044978b079a Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 21 Jun 2017 11:55:01 +0200
Subject: [PATCH 22/43] rfbucket_matching: make use of rewritten
 pdf_integrators_2d

This change allows to vary the integration method for
the beam moments when matching to the RFBucket.

Well tested in PyHEADTAIL-playground/RFBucket_Matching.
---
 PyHEADTAIL/particles/rfbucket_matching.py | 52 ++++++++++-------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 08efc9c0..3e098c39 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -11,7 +11,7 @@
 from scipy.integrate import fixed_quad
 from scipy.constants import e, c
 
-from ..cobra_functions.pdf_integrators_2d import quad2d
+from ..cobra_functions import pdf_integrators_2d as integr
 
 from . import Printing
 
@@ -21,6 +21,19 @@
 
 class RFBucketMatcher(Printing):
 
+    integrationmethod = ['quad', 'cumtrapz'][0]
+    def get_moment_integrators(self):
+        '''Return moment integrators from
+        cobra_functions.pdf_integrators_2d according to the chosen
+        self.integrationmethod. Allows to change integration method
+        for RFBucket matching.
+        '''
+        zero = getattr(integr, 'compute_zero_' + self.integrationmethod)
+        mean = getattr(integr, 'compute_mean_' + self.integrationmethod)
+        var = getattr(integr, 'compute_var_' + self.integrationmethod)
+        cov = getattr(integr, 'compute_cov_' + self.integrationmethod)
+        return zero, mean, var, cov
+
     def __init__(self, rfbucket, distribution_type=None, sigma_z=None,
                  epsn_z=None, verbose_regeneration=False, psi=None,
                  *args, **kwargs):
@@ -202,43 +215,24 @@ def mask_out(s, u, v):
     def _compute_sigma(self, rfbucket, psi):
         z_left = rfbucket.z_left
         z_right = rfbucket.z_right
+        zero, mean, var, cov = self.get_moment_integrators()
 
-        f = lambda x, y: self.psi(x, y)
-        Q = quad2d(f, rfbucket.separatrix, z_left, z_right)
-        f = lambda x, y: psi(x, y)*x
-        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
-        f = lambda x, y: psi(x, y)*(x-M)**2
-        V = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
-        var_x = V
+        var_x = var(self.psi, lambda x: -rfbucket.separatrix(x),
+                    rfbucket.separatrix, z_left, z_right,
+                    direction='x') # x means z direction
 
         return np.sqrt(var_x)
 
     def _compute_emittance(self, rfbucket, psi):
         z_left = rfbucket.z_left
         z_right = rfbucket.z_right
+        zero, mean, var, cov = self.get_moment_integrators()
 
-        f = lambda x, y: self.psi(x, y)
-        Q = quad2d(f, rfbucket.separatrix, z_left, z_right)
-
-        f = lambda x, y: psi(x, y)*x
-        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
-        f = lambda x, y: psi(x, y)*(x-M)**2
-        V = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
-        mean_x = M
-        var_x  = V
-
-        f = lambda x, y: psi(x, y)*y
-        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
-        f = lambda x, y: psi(x, y)*(y-M)**2
-        V = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
-        mean_y = M
-        var_y  = V
-
-        f = lambda x, y: psi(x, y)*(x-mean_x)*(y-mean_y)
-        M = quad2d(f, rfbucket.separatrix, z_left, z_right)/Q
-        mean_xy = M
+        var_x, cov_xy, var_y = cov(
+            self.psi, lambda x: -rfbucket.separatrix(x),
+            rfbucket.separatrix, z_left, z_right)
 
-        return (np.sqrt(var_x*var_y - mean_xy**2) *
+        return (np.sqrt(var_x*var_y - cov_xy**2) *
                 4*np.pi*rfbucket.p0/np.abs(rfbucket.charge))
 
 

From 1cadd492c6043c51958012f9cda84cefbba06bee Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 21 Jun 2017 14:54:34 +0200
Subject: [PATCH 23/43] pdf_integrators_2d: add description, correct DOCSTRINGS

---
 .../cobra_functions/pdf_integrators_2d.py       | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py b/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
index 72297ee2..ee547c08 100644
--- a/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
+++ b/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
@@ -1,12 +1,24 @@
-from scipy.integrate import quad, fixed_quad, dblquad, cumtrapz, romb
+'''
+@author Kevin Li, Adrian Oeftiger
+@date 21.06.2017
+@brief 2D distribution integration methods for y(x) parametrised domains
+'''
+from scipy.integrate import quad, dblquad, cumtrapz, romb
+
+import numpy as np
 
 
 def quad2d(f, ylimits, xmin, xmax):
+    '''Integrate 2D function f=f(x,y) over interval [xmin,xmax] and
+    between contours [-ylimits(x), ylimits(x)] using the numerical
+    scipy.integrate.dblquad integration method.
+    '''
     Q, error = dblquad(lambda y, x: f(x, y), xmin, xmax,
                        lambda x: -ylimits(x), lambda x: ylimits(x))
 
     return Q
 
+### scipy.integrate.dblquad moment integration:
 
 def compute_zero_quad(psi, ylimit_min, ylimit_max, xmin, xmax):
     '''Compute the zeroth moment of the distribution function psi from
@@ -94,9 +106,6 @@ def compute_cov_quad(psi, ylimit_min, ylimit_max, xmin, xmax):
         - psi: 2D distribution function with two arguments x and y
         - ylimits: contour function yielding the y(x) limit for given x
         - xmin, xmax: lower and upper limit in the first argument of psi
-
-    NB: assumes a symmetric distribution function with respect to the
-    second argument (the vertical direction).
     '''
 
     Q = compute_zero_quad(psi, ylimit_min, ylimit_max, xmin, xmax)

From 6c5ff83eb9155386d31f4699a910c7647f2b6e3a Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 21 Jun 2017 15:19:51 +0200
Subject: [PATCH 24/43] pdf_integrators_2d: adding cumtrapz functionality

In analogy to quad functions, the cumtrapz functions
provide moment integration up to covariance 2nd order.

scipy.integrate.romberg is left in a functioning state
for the standard deviation but not adapted to the
structure of the quad and cumtrapz approaches.

cumtrapz has been tested with all available longitudinal
distribution types in the RFBucketMatcher, i.e.
- thermal
- qGaussian
- parabolic
- waterbag (!!! this works only with cumtrapz, not with quad!)

cumtrapz makes any distribution type converge quickly but
provides slightly less accurate matching (difference to
quad is below 1e-4 in resulting sigma_z or epsn_z), which
is negligible given the usual numerical statistics noise
in macro-particle generation.
---
 .../cobra_functions/pdf_integrators_2d.py     | 153 +++++++++++++++---
 1 file changed, 128 insertions(+), 25 deletions(-)

diff --git a/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py b/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
index ee547c08..78f88a14 100644
--- a/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
+++ b/PyHEADTAIL/cobra_functions/pdf_integrators_2d.py
@@ -122,71 +122,174 @@ def compute_cov_quad(psi, ylimit_min, ylimit_max, xmin, xmax):
 
     return V_x, C_xy, V_y
 
-def compute_zero_cumtrapz(psi, ylimits, xmin, xmax, n_samples=257):
+### scipy.integrate.cumtrapz moment integration:
 
-    x_arr = np.linspace(xmin, xmax, n_samples)
+def compute_zero_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax,
+                          n_samples=513):
+    '''Compute the zeroth moment of the distribution function psi from
+    xmin to xmax between the contours ylimit_min and ylimit_max
+    (e.g. an RFBucket separatrix) using the numerical
+    scipy.integrate.cumtrapz integration method.
+
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimit_min, ylimit_max: contour functions yielding the lower
+          and the upper y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
+        - n_samples: integer number of sampling points for integration
+    '''
+
+    x_arr = np.linspace(xmin, xmax, num=n_samples)
     dx = x_arr[1] - x_arr[0]
 
     Q = 0
     for x in x_arr:
-        y = np.linspace(0, ylimits(x), n_samples)
+        y = np.linspace(ylimit_min(x), ylimit_max(x), num=n_samples)
         z = psi(x, y)
         Q += cumtrapz(z, y)[-1]
     Q *= dx
 
     return Q
 
-def compute_mean_cumtrapz(psi, ylimits, xmin, xmax, n_samples=257):
+def compute_mean_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax,
+                          direction='x', n_samples=513):
+    '''Compute the first moment of the distribution function psi from
+    xmin to xmax between the contours ylimit_min and ylimit_max
+    (e.g. an RFBucket separatrix) using the numerical
+    scipy.integrate.cumtrapz integration method.
 
-    Q = compute_zero_cumtrapz(psi, ylimits, xmin, xmax, n_samples)
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimit_min, ylimit_max: contour functions yielding the lower
+          and the upper y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
+        - direction: 'x' or 'y' for calculating the mean in the first
+          or second argument of psi, respectively (default: 'x')
+        - n_samples: integer number of sampling points for integration
+    '''
 
-    x_arr = np.linspace(xmin, xmax, n_samples)
+    Q = compute_zero_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax,
+                              n_samples)
+
+    x_arr = np.linspace(xmin, xmax, num=n_samples)
     dx = x_arr[1] - x_arr[0]
 
+    if direction is 'x':
+        f = lambda x, y: x * psi(x, y)
+    elif direction is 'y':
+        f = lambda x, y: y * psi(x, y)
+    else:
+        raise ValueError('direction needs to be either "x" or "y".')
+
     M = 0
     for x in x_arr:
-        y = np.linspace(0, ylimits(x), n_samples)
-        z = x * psi(x, y)
+        y = np.linspace(ylimit_min(x), ylimit_max(x), num=n_samples)
+        z = f(x, y)
         M += cumtrapz(z, y)[-1]
     M *= dx
 
     return M/Q
 
-def compute_std_cumtrapz(psi, ylimits, xmin, xmax, n_samples=257):
-    '''
-    Compute the variance of the distribution function psi from xmin
-    to xmax along the contours ylimits using numerical integration
-    methods.
+def compute_var_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax,
+                         direction='x', n_samples=513):
+    '''Compute the second moment (variance) with respect to the x or
+    y direction of the distribution function psi from xmin to xmax
+    between the contours ylimit_min and ylimit_max
+    (e.g. an RFBucket separatrix) using the numerical
+    scipy.integrate.cumtrapz integration method.
+
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimit_min, ylimit_max: contour functions yielding the lower
+          and the upper y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
+        - direction: 'x' or 'y' for calculating the mean in the first
+          or second argument of psi, respectively (default: 'x')
+        - n_samples: integer number of sampling points for integration
     '''
 
-    Q = compute_zero_cumtrapz(psi, ylimits, xmin, xmax, n_samples)
-    M = compute_mean_cumtrapz(psi, ylimits, xmin, xmax, n_samples)
+    Q = compute_zero_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax,
+                              n_samples)
+    M = compute_mean_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax,
+                              direction, n_samples)
 
-    x_arr = np.linspace(xmin, xmax, n_samples)
+    x_arr = np.linspace(xmin, xmax, num=n_samples)
     dx = x_arr[1] - x_arr[0]
 
+    if direction is 'x':
+        f = lambda x, y: (x - M)**2 * psi(x, y)
+    elif direction is 'y':
+        f = lambda x, y: (y - M)**2 * psi(x, y)
+    else:
+        raise ValueError('direction needs to be either "x" or "y".')
+
     V = 0
     for x in x_arr:
-        y = np.linspace(0, ylimits(x), n_samples)
-        z = (x-M)**2 * psi(x, y)
+        y = np.linspace(ylimit_min(x), ylimit_max(x), num=n_samples)
+        z = f(x, y)
         V += cumtrapz(z, y)[-1]
     V *= dx
 
-    return np.sqrt(V/Q)
+    return V/Q
+
+def compute_cov_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax,
+                         n_samples=513):
+    '''Compute the second moments (covariance matrix entries) of the
+    distribution function psi from xmin to xmax between the contours
+    ylimit_min and ylimit_max (e.g. an RFBucket separatrix) using the
+    numerical scipy.integrate.cumtrapz integration method.
+    For x the first and y the second argument of psi, return the tuple
+    (variance(x), covariance(x,y), variance(y)).
+
+    Arguments:
+        - psi: 2D distribution function with two arguments x and y
+        - ylimit_min, ylimit_max: contour functions yielding the lower
+          and the upper y(x) limit for given x
+        - xmin, xmax: lower and upper limit in the first argument of psi
+        - n_samples: integer number of sampling points for integration
+    '''
+
+
+    Q = compute_zero_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax)
+    M_x = compute_mean_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax, 'x')
+    M_y = compute_mean_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax, 'y')
+
+    V_x = compute_var_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax, 'x')
+    V_y = compute_var_cumtrapz(psi, ylimit_min, ylimit_max, xmin, xmax, 'y')
+
+    x_arr = np.linspace(xmin, xmax, num=n_samples)
+    dx = x_arr[1] - x_arr[0]
+
+    C_xy = 0
+    for x in x_arr:
+        y = np.linspace(ylimit_min(x), ylimit_max(x), num=n_samples)
+        z = (x - M_x) * (y - M_y) * psi(x, y)
+        C_xy += cumtrapz(z, y)[-1]
+    C_xy *= dx
+
+    C_xy /= Q
+
+    return V_x, C_xy, V_y
+
+
+### scipy.integrate.romberg standard deviation integration:
+### ==> not yet adapted to above quad and cumtrapz approaches,
+###     works as Kevin had previously defined it in the RFBucketMatcher
+###     (slightly corrected to make this function work here)
 
-def compute_std_romberg(psi, ylimits, xmin, xmax, n_samples=257):
+def compute_std_romberg(psi, ylimits, xmin, xmax, n_samples=513):
     '''
-    Compute the variance of the distribution function psi from xmin
-    to xmax along the contours ylimits using numerical integration
-    methods.
+    Compute the standard deviation of the distribution function psi
+    from xmin to xmax along the contours ylimits using numerical
+    integration methods.
     '''
 
-    x_arr = np.linspace(xmin, xmax, n_samples)
+    x_arr = np.linspace(xmin, xmax, num=n_samples)
     dx = x_arr[1] - x_arr[0]
 
     Q, V = 0, 0
     for x in x_arr:
-        y = np.linspace(0, ylimits(x), n_samples)
+        y = np.linspace(0, ylimits(x), num=n_samples)
         dy = y[1] - y[0]
         z = psi(x, y)
         Q += romb(z, dy)

From 4d71ef6444e59251ae5644378969d33d165f2bab Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 22 Jun 2017 14:12:35 +0200
Subject: [PATCH 25/43] rf_bucket: DOCSTRING updates

---
 PyHEADTAIL/trackers/rf_bucket.py | 50 +++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/PyHEADTAIL/trackers/rf_bucket.py b/PyHEADTAIL/trackers/rf_bucket.py
index a8014393..fa701742 100644
--- a/PyHEADTAIL/trackers/rf_bucket.py
+++ b/PyHEADTAIL/trackers/rf_bucket.py
@@ -385,6 +385,17 @@ def acc_force(self, z, ignore_add_forces=False):
 
 
     def rf_potential(self, V, h, dphi, dp, acceleration=True):
+        '''Return the RF electric potential energy including the linear
+        acceleration slope (if acceleration == True).
+
+        Arguments:
+            - V: list of voltages for each harmonic
+            - h: list of harmonics
+            - dphi: list of phase offsets for each harmonic
+            - p_increment: momentum increase per turn
+            - acceleration: whether to superimpose the linear
+              acceleration slope (induced by p_increment, default=True)
+        '''
         def vf(z):
             coefficient = np.abs(self.charge)/self.circumference
             focusing_potential = reduce(lambda x, y: x+y, [
@@ -418,10 +429,12 @@ def total_potential(self, z, ignore_add_potentials=False,
         to zero.
 
         Arguments:
-        - make_convex: multiplies by sign(eta) for plotting etc.
-        To see a literal 'bucket structure' in the sense of a
-        local minimum in the Hamiltonian topology, set make_convex=True
-        in order to return sign(eta)*hamiltonian(z, dp).
+            - make_convex: multiplies by sign(eta) for plotting etc.
+              To see a literal 'bucket structure' in the sense of
+              always having a local maximum in the Hamiltonian topology
+              where the stable fix points are located, set
+              make_convex=True in order to return
+              sign(eta)*hamiltonian(z, dp).
         '''
         v = (self.rf_potential(self.V, self.h, self.dphi,
                                self.p_increment, acceleration)(z) +
@@ -484,10 +497,12 @@ def acc_potential(self, z, ignore_add_potentials=False,
         to zero.
 
         Arguments:
-        - make_convex: multiplies by sign(eta) for plotting etc.
-        To see a literal 'bucket structure' in the sense of a
-        local minimum in the Hamiltonian topology, set make_convex=True
-        in order to return sign(eta)*hamiltonian(z, dp).
+            - make_convex: multiplies by sign(eta) for plotting etc.
+              To see a literal 'bucket structure' in the sense of
+              always having a local maximum in the Hamiltonian topology
+              where the stable fix points are located, set
+              make_convex=True in order to return
+              sign(eta)*hamiltonian(z, dp).
         '''
         pot_tot = self.make_total_potential(
             ignore_add_potentials=ignore_add_potentials)
@@ -562,10 +577,12 @@ def hamiltonian(self, z, dp, make_convex=False):
         Coul*Volt/p0.
 
         Arguments:
-        - make_convex: multiplies by sign(eta) for plotting etc.
-        To see a literal 'bucket structure' in the sense of a
-        local minimum in the Hamiltonian topology, set make_convex=True
-        in order to return sign(eta)*hamiltonian(z, dp).
+            - make_convex: multiplies by sign(eta) for plotting etc.
+              To see a literal 'bucket structure' in the sense of
+              always having a local maximum in the Hamiltonian topology
+              where the stable fix points are located, set
+              make_convex=True in order to return
+              sign(eta)*hamiltonian(z, dp).
         '''
         h = (-0.5 * self.eta0 * self.beta * c * dp**2 +
              self.total_potential(z) / self.p0)
@@ -632,8 +649,8 @@ def make_is_accepted(self, margin=0):
         return partial(self.is_in_separatrix, margin=margin)
 
     def emittance_single_particle(self, z=None, sigma=2):
-        """The single particle emittance computed along a given equihamiltonian line
-
+        """The single particle emittance computed along a given
+        equihamiltonian line.
         """
         if z is not None:
             zl = -sigma * z
@@ -650,9 +667,8 @@ def emittance_single_particle(self, z=None, sigma=2):
         return Q * 2*self.p0/np.abs(self.charge)
 
     def bunchlength_single_particle(self, epsn_z, verbose=False):
-        """The corresponding rms bunch length computed form the single particle
-        emittance
-
+        """The corresponding RMS bunch length computed from the single
+        particle emittance.
         """
         def emittance_from_zcut(zcut):
             emittance = self.emittance_single_particle(zcut)

From 2355c13818de87d4adcd68e4f15c2c9f217b93f2 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 22 Jun 2017 14:13:04 +0200
Subject: [PATCH 26/43] rf_bucket: total_potential may compute without
 shifting...

... to have zero potential energy at the separatrix.
The reason is that this reshifting of total_potential requires
already determined fix points. However, in some situations
(e.g. during the determination of the stable fix points) we
only need potential differences such that the absolute potential
value does not play a role -- here we can use the new
offset=False to avoid the reshifting.
---
 PyHEADTAIL/trackers/rf_bucket.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/PyHEADTAIL/trackers/rf_bucket.py b/PyHEADTAIL/trackers/rf_bucket.py
index fa701742..f6523ed0 100644
--- a/PyHEADTAIL/trackers/rf_bucket.py
+++ b/PyHEADTAIL/trackers/rf_bucket.py
@@ -384,7 +384,8 @@ def acc_force(self, z, ignore_add_forces=False):
         return total_force(z) - self.deltaE / self.circumference
 
 
-    def rf_potential(self, V, h, dphi, dp, acceleration=True):
+    def rf_potential(self, V, h, dphi, p_increment,
+                     acceleration=True, offset=True):
         '''Return the RF electric potential energy including the linear
         acceleration slope (if acceleration == True).
 
@@ -395,6 +396,9 @@ def rf_potential(self, V, h, dphi, dp, acceleration=True):
             - p_increment: momentum increase per turn
             - acceleration: whether to superimpose the linear
               acceleration slope (induced by p_increment, default=True)
+            - offset: boolean whether the potential energy should be
+              shifted to zero at the unstable fix point enclosing
+              the separatrix of the RF bucket (default=True).
         '''
         def vf(z):
             coefficient = np.abs(self.charge)/self.circumference
@@ -406,15 +410,19 @@ def vf(z):
         if not acceleration:
             return vf
         else:
-            zmax = self.z_ufp_separatrix
+            v_norm = 0 # normalisation shift
+            if offset:
+                zmax = self.z_ufp_separatrix
+                v_norm = (vf(zmax) +
+                          p_increment*self.beta*c/self.circumference * zmax)
 
             def f(z):
-                return (vf(z) - vf(zmax) +
-                        (dp*self.beta*c/self.circumference * (z - zmax)))
+                return (vf(z) + p_increment*self.beta*c/self.circumference * z
+                        - v_norm)
             return f
 
     def total_potential(self, z, ignore_add_potentials=False,
-                        make_convex=False, acceleration=True):
+                        make_convex=False, acceleration=True, offset=True):
         '''Return the total electric potential energy including
         - the linear acceleration slope and
         - the additional electric potential energies (provided via
@@ -435,9 +443,12 @@ def total_potential(self, z, ignore_add_potentials=False,
               where the stable fix points are located, set
               make_convex=True in order to return
               sign(eta)*hamiltonian(z, dp).
+            - offset: boolean whether the potential energy should be
+              shifted to zero at the unstable fix point enclosing
+              the separatrix of the RF bucket (default=True).
         '''
         v = (self.rf_potential(self.V, self.h, self.dphi,
-                               self.p_increment, acceleration)(z) +
+                               self.p_increment, acceleration, offset)(z) +
              sum(pot(z) for pot in self._add_potentials
                  if not ignore_add_potentials))
         if make_convex:

From d49c46063a722ffee6a669a67b89f48c23027777 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 22 Jun 2017 14:18:35 +0200
Subject: [PATCH 27/43] rf_bucket: extended fix point computation...

... supports now multi-harmonic RF buckets where
the fundamental can be weaker than higher harmonics
leading to a strongly distorted separatrix.
(e.g. stable fix point determination in an ac-/decelerating
dual harmonic system with both harmonics at the same
voltage work now, cf. CERN's PS Booster acceleration!)
---
 PyHEADTAIL/trackers/rf_bucket.py | 41 +++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/PyHEADTAIL/trackers/rf_bucket.py b/PyHEADTAIL/trackers/rf_bucket.py
index f6523ed0..489ba2f4 100644
--- a/PyHEADTAIL/trackers/rf_bucket.py
+++ b/PyHEADTAIL/trackers/rf_bucket.py
@@ -548,13 +548,17 @@ def _get_bucket_boundaries(self):
 
     def _get_zsfp_and_zufp(self):
         '''Return (z_sfp, z_ufp),
-        where z_sfp is the synchronous z on stable fix point,
-        and z_ufp is the z of the (first) unstable fix point.
-
-        Works for dominant harmonic situations which look like
-        a single harmonic (which may be slightly perturbed), i.e.
-        only one stable fix point and at most
-        2 unstable fix points (stationary case).
+        where z_sfp is the z location of the stable fix points,
+        and z_ufp is the z location of the unstable fix points
+        belonging to this RF bucket.
+
+        A stationary RF bucket has the right-most UFP overlapping
+        with the adjacent RF bucket's separatrix, while for an
+        ac-/decelerating RF bucket one of the two out-most UFP always
+        belongs to the separatrix of the adjacent RF bucket. This UFP
+        will be discarded (although you find it with the zero crossing
+        of the total_force) by comparing the voltages between the
+        out-most UFP.
         '''
         z0 = np.atleast_1d(self.zero_crossings(self.total_force))
 
@@ -566,18 +570,27 @@ def _get_zsfp_and_zufp(self):
                              'why do you ask me for bucket boundaries ' +
                              'in this hyperbolic phase space structure?!')
 
-        z0odd = z0[::2]
-        z0even = z0[1::2]
-
         if len(z0) == 1:  # exactly zero bucket area
             return z0, z0
 
+        V_left = self.total_potential(z0[0], make_convex=True, offset=False)
+        V_right = self.total_potential(z0[-1], make_convex=True, offset=False)
+
         if self.eta0 * self.p_increment > 0:
-            # separatrix ufp right of sfp
-            z_sfp, z_ufp = z0odd, z0even
+            # separatrix ufp right of sfp AND we are ac-/decelerating
+            if V_left < V_right:
+                # --> need to remove first ufp (belongs to bucket to the left)
+                z0 = z0[1:]
+            z_sfp, z_ufp = z0[::2], z0[1::2]
+        elif self.eta0 * self.p_increment == 0:
+            # stationary bucket, need both left and right UFP (overlapping!)
+            z_sfp, z_ufp = z0[1::2], z0[::2]
         else:
-            # separatrix ufp left of sfp
-            z_sfp, z_ufp = z0even, z0odd
+            # separatrix ufp left of sfp AND we are ac-/decelerating
+            if V_right < V_left:
+                # --> need to remove last ufp (belongs to bucket to the right)
+                z0 = z0[:-1]
+            z_sfp, z_ufp = z0[1::2], z0[::2]
 
         return z_sfp, z_ufp
 

From 9950c58eeb1a6e97f2eec93bad77e8f8aa51e3a6 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 22 Jun 2017 14:28:07 +0200
Subject: [PATCH 28/43] rfbucket_matching: change to brentq for both epsn and
 sigma matching

... makes the RFBucketMatcher more flexible! Cf. RFBucket_Matching.ipynb
in github/PyCOMPLETE/PyHEADTAIL-playground for accelerating
double harmonic buckets and non-thermal distributions... it works
so nicely! :-))
---
 PyHEADTAIL/particles/rfbucket_matching.py | 38 ++++++++++++-----------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 3e098c39..830306f0 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -89,18 +89,21 @@ def error_from_target_epsn(ec):
                 ec, from_variable='epsn')
             emittance = self._compute_emittance(self.rfbucket, self.psi)
 
+            if np.isnan(emittance): raise ValueError
+
             self.prints('... distance to target emittance: ' +
                         '{:.2e}'.format(emittance-epsn_z))
 
             return emittance-epsn_z
 
-        try:
-            ec_bar = newton(error_from_target_epsn, epsn_z, tol=5e-4)
-        except RuntimeError:
-            self.warns('RFBucketMatcher -- failed to converge while '
-                       'using Newton-Raphson method. '
-                       'Instead trying classic Brent method...')
-            ec_bar = brentq(error_from_target_epsn, epsn_z/2, 2*epsn_max)
+        # try:
+        #     ec_bar = newton(error_from_target_epsn, epsn_z, tol=5e-4)
+        # except RuntimeError, ValueError:
+        #     self.warns('RFBucketMatcher -- failed to converge while '
+        #                'using Newton-Raphson method. '
+        #                'Instead trying classic Brent method...')
+        ec_bar = brentq(error_from_target_epsn, epsn_z/100, 2*epsn_max,
+                        rtol=1e-5)
 
         self.psi_object.H0 = self.rfbucket.guess_H0(
             ec_bar, from_variable='epsn')
@@ -133,17 +136,16 @@ def error_from_target_sigma(H0):
 
             return length-sigma
 
-        try:
-            H0_init = self.rfbucket.guess_H0(
-                sigma, from_variable='sigma')
-            H0_fin = newton(error_from_target_sigma, H0_init)
-        except RuntimeError:
-            self.warns('RFBucketMatcher -- failed to converge while '
-                       'using Newton-Raphson method. '
-                       'Instead trying classic Brent method...')
-            H0_max = max(self.rfbucket.hamiltonian(
-                self.rfbucket.z_sfp, 0, make_convex=True))*2
-            H0_fin = brentq(error_from_target_sigma, H0_max*1e-3, H0_max)
+        # try:
+        #     H0_init = self.rfbucket.guess_H0(
+        #         sigma, from_variable='sigma')
+        #     H0_fin = newton(error_from_target_sigma, H0_init)
+        # except RuntimeError, ValueError:
+        #     self.warns('RFBucketMatcher -- failed to converge while '
+        #                'using Newton-Raphson method. '
+        #                'Instead trying classic Brent method...')
+        H0_max = self.rfbucket.h_sfp(make_convex=True) * 2
+        H0_fin = brentq(error_from_target_sigma, 1e-3, H0_max, rtol=1e-5)
 
         self.psi_object.H0 = H0_fin
         sigma = self._compute_sigma(self.rfbucket, self.psi)

From ea56c1b5ec1ed964d79b018e7190571d51fa8d3f Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 22 Jun 2017 17:23:50 +0200
Subject: [PATCH 29/43] rfbucket_matching: first use brentq then newton for
 matching

... because some few times (I only saw it with the thermal distribution)
the BrentQ approach seems to have problems with large bunch lengths.
In these cases we fall back to Newton-Raphson.
---
 PyHEADTAIL/particles/rfbucket_matching.py | 42 +++++++++++------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 830306f0..9b36d0cf 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -96,14 +96,13 @@ def error_from_target_epsn(ec):
 
             return emittance-epsn_z
 
-        # try:
-        #     ec_bar = newton(error_from_target_epsn, epsn_z, tol=5e-4)
-        # except RuntimeError, ValueError:
-        #     self.warns('RFBucketMatcher -- failed to converge while '
-        #                'using Newton-Raphson method. '
-        #                'Instead trying classic Brent method...')
-        ec_bar = brentq(error_from_target_epsn, epsn_z/100, 2*epsn_max,
-                        rtol=1e-5)
+        try:
+            ec_bar = brentq(error_from_target_epsn, epsn_z/100, 2*epsn_max,
+                            rtol=1e-5)
+        except ValueError:
+            self.warns('Failed to converge with Brent method, '
+                       'continuing with Newton-Raphson method.')
+            ec_bar = newton(error_from_target_epsn, epsn_z, tol=1e-5)
 
         self.psi_object.H0 = self.rfbucket.guess_H0(
             ec_bar, from_variable='epsn')
@@ -124,9 +123,10 @@ def psi_for_bunchlength_newton_method(self, sigma):
             sigma = sigma_max*0.99
         self.prints('*** Maximum RMS bunch length ' + str(sigma_max) + 'm.')
 
-        def error_from_target_sigma(H0):
+        def error_from_target_sigma(sc):
             '''Width for bunch length'''
-            self.psi_object.H0 = H0
+            self.psi_object.H0 = self.rfbucket.guess_H0(
+                sc, from_variable='sigma')
             length = self._compute_sigma(self.rfbucket, self.psi)
 
             if np.isnan(length): raise ValueError
@@ -136,18 +136,16 @@ def error_from_target_sigma(H0):
 
             return length-sigma
 
-        # try:
-        #     H0_init = self.rfbucket.guess_H0(
-        #         sigma, from_variable='sigma')
-        #     H0_fin = newton(error_from_target_sigma, H0_init)
-        # except RuntimeError, ValueError:
-        #     self.warns('RFBucketMatcher -- failed to converge while '
-        #                'using Newton-Raphson method. '
-        #                'Instead trying classic Brent method...')
-        H0_max = self.rfbucket.h_sfp(make_convex=True) * 2
-        H0_fin = brentq(error_from_target_sigma, 1e-3, H0_max, rtol=1e-5)
-
-        self.psi_object.H0 = H0_fin
+        try:
+            sc_bar = brentq(error_from_target_sigma, sigma/100, 2*sigma_max,
+                            rtol=1e-5)
+        except ValueError:
+            self.warns('Failed to converge with Brent method, '
+                       'continuing with Newton-Raphson method.')
+            sc_bar = newton(error_from_target_sigma, sigma, tol=1e-5)
+
+        self.psi_object.H0 = self.rfbucket.guess_H0(
+            sc_bar, from_variable='sigma')
         sigma = self._compute_sigma(self.rfbucket, self.psi)
         self.prints('--> Bunch length: ' + str(sigma))
         emittance = self._compute_emittance(self.rfbucket, self.psi)

From 88b8a3a4f21633d528935de94923fca677500a20 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Mon, 26 Jun 2017 15:27:44 +0200
Subject: [PATCH 30/43] setup.py: correcting Kevin's email address

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 5a6bd71f..6b2092b9 100755
--- a/setup.py
+++ b/setup.py
@@ -87,7 +87,7 @@
         'for simulating macro-particle beam dynamics with collective effects.',
     url='https://github.com/PyCOMPLETE/PyHEADTAIL',
     author='Kevin Li',
-    author_email='Kevin.Li@cern.ch',
+    author_email='Kevin.Shing.Bruce.Li@cern.ch',
     maintainer='Adrian Oeftiger',
     maintainer_email='Adrian.Oeftiger@cern.ch',
     packages=find_packages(),

From d207a07d6fef9dd2246bfcbbaedc4ef6ea4fb98f Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Mon, 26 Jun 2017 18:19:10 +0200
Subject: [PATCH 31/43] rfbucket_matching: make warning clearer

---
 PyHEADTAIL/particles/rfbucket_matching.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/PyHEADTAIL/particles/rfbucket_matching.py b/PyHEADTAIL/particles/rfbucket_matching.py
index 9b36d0cf..a027d8a5 100644
--- a/PyHEADTAIL/particles/rfbucket_matching.py
+++ b/PyHEADTAIL/particles/rfbucket_matching.py
@@ -100,8 +100,9 @@ def error_from_target_epsn(ec):
             ec_bar = brentq(error_from_target_epsn, epsn_z/100, 2*epsn_max,
                             rtol=1e-5)
         except ValueError:
-            self.warns('Failed to converge with Brent method, '
-                       'continuing with Newton-Raphson method.')
+            self.warns(
+                'RFBucketMatcher: failed to converge with Brent method, '
+                'continuing with Newton-Raphson method.')
             ec_bar = newton(error_from_target_epsn, epsn_z, tol=1e-5)
 
         self.psi_object.H0 = self.rfbucket.guess_H0(
@@ -140,8 +141,9 @@ def error_from_target_sigma(sc):
             sc_bar = brentq(error_from_target_sigma, sigma/100, 2*sigma_max,
                             rtol=1e-5)
         except ValueError:
-            self.warns('Failed to converge with Brent method, '
-                       'continuing with Newton-Raphson method.')
+            self.warns(
+                'RFBucketMatcher: failed to converge with Brent method, '
+                'continuing with Newton-Raphson method.')
             sc_bar = newton(error_from_target_sigma, sigma, tol=1e-5)
 
         self.psi_object.H0 = self.rfbucket.guess_H0(

From e19fb2b91ecb50fe085b7a69802120b2c4adf9c6 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Tue, 27 Jun 2017 15:10:04 +0200
Subject: [PATCH 32/43] pypic_spacecharge: missing gamma in
 FrozenGaussianSpaceCharge25D

One gamma factor was missing before for the evaluation of the
lab fields from the Bassetti-Erskine formula.

The Lorentz trafo including magnetic fields yields a force
in the lab frame which goes F ~ E_beam{frame} / gamma^2.
One gamma comes from the Lorentz trafo of the Bassetti-Erskine
field itself including the magnetic field and another
gamma factor comes from the line density (which needs to be
evaluated in the beam frame, i.e.
lambda_beam = slices.lambda_z() / gamma):

F = q E_beam / gamma = q (E_n_BassErsk * lambda_beam) / gamma
  = q E_n_BassErsk * lambda_lab / gamma^2
---
 PyHEADTAIL/spacecharge/pypic_spacecharge.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/PyHEADTAIL/spacecharge/pypic_spacecharge.py b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
index 7ae7ee6c..aea4d4f4 100644
--- a/PyHEADTAIL/spacecharge/pypic_spacecharge.py
+++ b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
@@ -177,8 +177,10 @@ def __init__(self, slicer, length, sigma_x, sigma_y, gamma,
         be_sc = TransverseGaussianSpaceCharge(None, None)
         fields_beamframe = be_sc.get_efieldn(xg, yg, 0, 0, sigma_x, sigma_y)
 
-        # Lorentz trafo to lab frame:
-        fields = [f/gamma for f in fields_beamframe]
+        # Lorentz trafo to lab frame + magnetic fields:
+        # F = q E_beam / gamma = q (E_n_BassErsk * lambda_beam) / gamma
+        #   = q E_n_BassErsk * lambda_lab / gamma^2
+        fields = [f * gamma**-2 for f in fields_beamframe]
 
         super(FrozenGaussianSpaceCharge25D, self).__init__(
             slicer=slicer, length=length, mesh=mesh, fields=fields,

From ebadafab309ad773b9efb344311b1756de61e597 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Fri, 14 Jul 2017 08:59:42 +0200
Subject: [PATCH 33/43] pmath: empty_like uses memory pool allocator

Speeds up memory allocation by one order of magnitude!
E.g. on C2075, for
>>> a = gpuarray.arange(1e8)
the timings are
1. pm.empty_like(a): 327us
2. gpuarray.empty_like(a): 4.19ms
---
 PyHEADTAIL/general/pmath.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/PyHEADTAIL/general/pmath.py b/PyHEADTAIL/general/pmath.py
index 9df0b9dc..f6eb373f 100644
--- a/PyHEADTAIL/general/pmath.py
+++ b/PyHEADTAIL/general/pmath.py
@@ -273,7 +273,9 @@ def _slice_to_particles(sliceset, slice_array, particle_array=None):
             allocator=gpu_utils.memory_pool.allocate, **kwargs),
         'empty': lambda *args, **kwargs: pycuda.gpuarray.empty(*args,
             allocator=gpu_utils.memory_pool.allocate, **kwargs),
-        'empty_like': pycuda.gpuarray.empty_like,
+        'empty_like': lambda ary, *args, **kwargs: pycuda.gpuarray.empty(
+            ary.shape, ary.dtype, *args,
+            allocator=gpu_utils.memory_pool.allocate, **kwargs),
         'ones': lambda *args, **kwargs: pycuda.gpuarray.zeros(*args,
             allocator=gpu_utils.memory_pool.allocate, **kwargs) + 1,
         'device': 'GPU',

From cb2f67a1d975ac50604d7ef00f60b90c51b5a5ef Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 19 Jul 2017 11:53:57 +0200
Subject: [PATCH 34/43] field_map: FieldMapSliceWise transversely centres slice
 by slice...

... as before the whole beam centroid was deducted like in the
general FieldMap case but we actually want the transverse
slice centroids to be moved to zero before applying the transverse
field map slice-by-slice.
Like this, FrozenSpaceCharge25D correctly applies the transverse
field around the local bunch centre along the longitudinal
direction. This becomes important e.g. for developing head-tail
modes with impedances while including direct space charge.
---
 PyHEADTAIL/field_maps/field_map.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/PyHEADTAIL/field_maps/field_map.py b/PyHEADTAIL/field_maps/field_map.py
index 21ad95a1..f297bda6 100644
--- a/PyHEADTAIL/field_maps/field_map.py
+++ b/PyHEADTAIL/field_maps/field_map.py
@@ -132,7 +132,13 @@ def __init__(self, slicer, *args, **kwargs):
         NB: mesh needs to be a two-dimensional mesh describing the
         discrete domain of the transverse fields.
 
-        NB2: the field values should be charge-density-normalised as
+        NB2: wrt_beam_centroid=True is implemented as a slice-by-slice
+        transverse centring of the beam as opposed to the superclass
+        FieldMap's 3D implementation, which sets the entire beam
+        centroid including the longitudinal centre-of-gravity to the
+        zero origin.
+
+        NB3: the field values should be charge-density-normalised as
         they are multiplied by the line charge density for each slice,
         c.f. e.g. the Bassetti-Erskine formula without Q (as in the
         spacecharge module's
@@ -150,12 +156,15 @@ def __init__(self, slicer, *args, **kwargs):
 
     def track(self, beam):
         # prepare argument for PyPIC mesh to particle interpolation
-        mx, my, mz = 0, 0, 0
+        mx, my = 0, 0
         if self.wrt_beam_centroid:
-            mx, my, mz = beam.mean_x(), beam.mean_y(), beam.mean_z()
+            slices = beam.get_slices(
+                self.slicer, statistics=["mean_x", "mean_y"])
+            mx = slices.convert_to_particles(slices.mean_x)
+            my = slices.convert_to_particles(slices.mean_y)
         mp_coords = [beam.x - mx,
                      beam.y - my,
-                     beam.z - mz] # zip will cut to #fields
+                     beam.z] # zip will cut to #fields
 
         mesh_fields_and_mp_coords = zip(self.fields, mp_coords)
 

From 56be4c82a35831b71c4756f37dab9d2d8d20e73d Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 19 Jul 2017 15:28:39 +0200
Subject: [PATCH 35/43] release-script: fix drafting release

The pull request message has to be fetched before it gets
closed (i.e. before the release branch is merged into master).
---
 release.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/release.py b/release.py
index ecf5c4ee..d75e4e44 100755
--- a/release.py
+++ b/release.py
@@ -305,6 +305,8 @@ def finalise_release():
 
     # all tools installed to automatically draft release?
     draft_release = check_release_tools()
+    if draft_release:
+        message = get_pullrequest_message(new_version)
 
     # bump version file
     new_version = establish_new_version(version_location)
@@ -340,7 +342,6 @@ def finalise_release():
 
     # publish github release (with message from pull request)
     if draft_release:
-        message = get_pullrequest_message(new_version)
         assert subprocess.call(
             ['gothub', 'release', '-u', github_user, '-r', github_repo,
              '-t', 'v' + new_version,

From 87dad155740364e691524deb4fb06bd0037c5e02 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 19 Jul 2017 16:13:23 +0200
Subject: [PATCH 36/43] release-script: add PyPI releasing...

... by calling the release.py script from the master branch.
---
 release.py | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/release.py b/release.py
index d75e4e44..2658d174 100755
--- a/release.py
+++ b/release.py
@@ -35,8 +35,10 @@
     description=(
         'Release a new version of PyHEADTAIL in 2 steps:\n'
         '1. prepare new release from develop branch '
-        '(requires release type argument)\n'
-        '2. publish new release (requires to be on release branch already)'),
+        '(requires release type argument "part")\n'
+        '2. publish new release (requires to be on release branch already)\n'
+        'optionally: release PyHEADTAIL to PyPI '
+        '(requires to be on master branch)'),
     formatter_class=argparse.RawTextHelpFormatter
 )
 parser.add_argument(
@@ -75,6 +77,15 @@ def get_version(version_location):
     '''Retrieve the version from version_location file.'''
     return importlib.import_module(version_location).__version__
 
+def do_tag_and_version_match(version):
+    '''Return whether the current git tag and the given version match.
+    NB: returns True if and only if the current commit is tagged with
+    the version tag.
+    '''
+    current_commit = subprocess.check_output(
+        ['git', 'describe', '--dirty']).rstrip().decode("utf-8")
+    return current_commit == 'v{}'.format(version)
+
 def which_part_increases(last_version, new_version):
     '''Return a string which version part is increased. Raise an error
     if new_version is not a valid direct successor to last_version.
@@ -352,6 +363,29 @@ def finalise_release():
         print ('*** Remember to manually draft this release from the '
                'github website.')
 
+def release_pip():
+    '''Release current version from master branch to PyPI.'''
+    if is_worktree_dirty():
+        git_status()
+        raise EnvironmentError('Release process can only be initiated on '
+                               'a clean git repository. You have uncommitted '
+                               'changes in your files, please fix this first.')
+    if current_branch() != "master":
+        raise EnvironmentError(
+            'PyPI releases can only be initiated from the master branch!')
+    current_version = get_version(version_location)
+    if not do_tag_and_version_match(current_version):
+        raise EnvironmentError(
+            'the current master branch commit needs to be the tagged version '
+            'which matches the version stored in the version file!')
+
+    assert subprocess.call(['python', 'setup.py', 'sdist']) == 0
+    assert subprocess.call(
+        ['twine', 'upload', '-r', 'pypi',
+         'dist/PyHEADTAIL-{}.tar.gz'.format(current_version)]) == 0
+    assert subprocess.call(['rm', '-r', 'dist', 'PyHEADTAIL.egg-info']) == 0
+
+
 # ALGORITHM FOR RELEASE PROCESS:
 if __name__ == '__main__':
     print ('*** Current working directory:\n' + os.getcwd() + '\n')
@@ -361,6 +395,8 @@ def finalise_release():
     if not (current_branch()[:len(release_branch_prefix)] ==
             release_branch_prefix):
         args = parser.parse_args()
+        if current_branch() == 'master':
+            release_pip()
         init_release(args.part)
     else:
         finalise_release()

From ec272ce7a6da40bf88955eb8df6d2f12fb3ce89f Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 19 Jul 2017 16:54:06 +0200
Subject: [PATCH 37/43] pypic_spacecharge: expose atomicAdd FP32 and FP64
 variants

By default, the PIC works with floats. The net sum effect on the
centroid of all particle kicks is of the order 10^{-10} for FP32.
---
 PyHEADTAIL/spacecharge/pypic_spacecharge.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/PyHEADTAIL/spacecharge/pypic_spacecharge.py b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
index aea4d4f4..9ca06e0e 100644
--- a/PyHEADTAIL/spacecharge/pypic_spacecharge.py
+++ b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
@@ -47,7 +47,7 @@ class SpaceChargePIC(Element):
     '''
 
     def __init__(self, length, pypic_algorithm, sort_particles=False,
-                 *args, **kwargs):
+                 pic_dtype=np.float32, *args, **kwargs):
         '''Arguments:
             - length: interaction length over which the space charge
               force is integrated.
@@ -60,6 +60,11 @@ def __init__(self, length, pypic_algorithm, sort_particles=False,
               particle-to-mesh and mesh-to-particles methods
               due to coalesced memory access, especially on the GPU
               (test the timing for your parameters though!).
+            - pic_dtype: can be np.float32 or np.float64 and determines
+              (for sort_particles == False) which atomicAdd should be
+              used on the GPU. On GPUs with computing capability <v6.0
+              the double precision atomicAdd is not hardware accelerated
+              and thus much slower.
 
               (NB: sort_particles=True is necessarily required for the
                PyPIC_GPU.sorted_particles_to_mesh method.)
@@ -67,6 +72,7 @@ def __init__(self, length, pypic_algorithm, sort_particles=False,
         self.length = length
         self.pypic = pypic_algorithm
         self.sort_particles = sort_particles
+        self.pic_dtype = pic_dtype
         self.is_25D = getattr(self.pypic.poissonsolver, 'is_25D', False)
         if self.pypic.mesh.dimension != 3:
             raise RuntimeError(
@@ -78,6 +84,7 @@ def track(self, beam, pypic_state=None):
         solve_kwargs = {
             'charge': beam.charge_per_mp,
             'state': pypic_state,
+            'dtype': self.pic_dtype,
         }
         if self.is_25D:
             # 2.5D: macro-particle charge becomes line density in beam frame

From b23be2b7d386ba6f89d804616f1af92a2b36c822 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 9 Aug 2017 14:48:02 +0200
Subject: [PATCH 38/43] pypic_factory: create_mesh now uses mesh.nz = nslices +
 1

Since the foremost slice particles should be included
in the PyPIClib GPU algorithms (specifically the atomicAdd
variants) for space charge, we need the mesh to include one more
node so that the number of cells in between the nodes are equal
to the number of slices (fence post error).
This was wrong before which is why the particles in the front were
excluded from the particles_to_mesh method (which retrieves
the charge density on the mesh).
---
 PyHEADTAIL/spacecharge/pypic_factory.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/PyHEADTAIL/spacecharge/pypic_factory.py b/PyHEADTAIL/spacecharge/pypic_factory.py
index 491315fc..b1fa88c8 100644
--- a/PyHEADTAIL/spacecharge/pypic_factory.py
+++ b/PyHEADTAIL/spacecharge/pypic_factory.py
@@ -95,14 +95,17 @@ def create_mesh(mesh_origin, mesh_distances, mesh_size,
         ]
         if symmetrize_mesh_to_slices:
             mesh_origin[-1] += mesh_distances[-1]/2.
-        mesh_size = mesh_size + [slices.n_slices]
+        # n_slices+1 because front slice (highest slice id) should be inside
+        # mesh while the highest mesh node ends the last cell (fence post error!)
+        mesh_size = mesh_size + [slices.n_slices + 1]
     dim = len(mesh_origin)
     if not dim == len(mesh_distances) == len(mesh_size):
         raise ValueError('All arguments for the mesh need to have as '
                          'many entries as the mesh should have dimensions!')
     mesh_class = getattr(meshing, 'RectMesh{dim}D'.format(dim=dim))
     return mesh_class(map(ensure_cpu, mesh_origin),
-                      map(ensure_cpu, mesh_distances), mesh_size,
+                      map(ensure_cpu, mesh_distances),
+                      map(ensure_cpu, mesh_size),
                       mathlib=pm)
 
 

From 28e3cc591f69945dc0f94a7024841cdf46c8c84c Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Wed, 9 Aug 2017 14:50:37 +0200
Subject: [PATCH 39/43] pypic_spacecharge: self-consistent PIC space charge now
 uses FP64

By default, the PIC atomicAdd variant will use FP64 instead of
FP32 precision, after a bug in PyPIC (introduced during recent
changes) has been fixed.
Timings of a PIC space charge track (1e6 particles) for the P100:
- FP32: 3.78ms
- FP64: 3.79ms
- sorted approach: 8.05ms
Accuracy for adding all kicks (i.e. the influence on the centroid
which ideally due to Newton's 3rd law should be identically zero):
- FP32: <5e-07
- FP64: <5e-14
- sorted approach: 5<e-14
The kicks had an order of <2e-4.

NB: the FP32 atomicAdd version can produce mesh_charges, whose
sum shows an error on the order of 1e-7 (i.e. the first decimal
after the comma is off if you use 1e6 macro-particles).
This is to be expected within floating point precision.
FP64 is much more accurate showing an error well outside the
usually employed range of number of macro-particles.

NB2: FP64 atomicAdd is hardware accelerated on the P100.
FP32 atomicAdd should be twice as quick than FP64, however
our algorithm casts FP64 arrays to FP32 which takes of
the same order of magnitude time as the atomicAdd kernel itself.
Therefore, on the P100 both FP64 and FP32 are approximately
equally quick.
On older architectures, FP64 atomicAdd is software emulated
and therefore much slower, so here you will pay a price
for the increased accuracy.

NB3: for space charge, the 1e-14 centroid kick can add up
over consecutive kicks. Consider Brownian motion of the
centroid (i.e. the error scales with the square root of steps).
Then, O(1e3) kicks per turn for O(1e5) turns yield O(1e8)
kicks for which the error will therefore be O(1e4) times
O(1e-14), so the absolute error after 100'000 turns on
the centroid kick will be O(1e-10). If x' and y' are of the
order of O(1e-6) then the influence of precision error
on the beam centroid is negligible for double precision.
However, single precision FP32 can have a huge impact on
the centroid, so avoid atomicAdd FP32 for space charge
simulations where the centroid motion is relevant (e.g.
for instabilities inflicted by machine impedances).

==> use FP64 atomicAdd particles_to_mesh for space charge
on new GPU architectures.
---
 PyHEADTAIL/spacecharge/pypic_spacecharge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyHEADTAIL/spacecharge/pypic_spacecharge.py b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
index 9ca06e0e..4958df47 100644
--- a/PyHEADTAIL/spacecharge/pypic_spacecharge.py
+++ b/PyHEADTAIL/spacecharge/pypic_spacecharge.py
@@ -47,7 +47,7 @@ class SpaceChargePIC(Element):
     '''
 
     def __init__(self, length, pypic_algorithm, sort_particles=False,
-                 pic_dtype=np.float32, *args, **kwargs):
+                 pic_dtype=np.float64, *args, **kwargs):
         '''Arguments:
             - length: interaction length over which the space charge
               force is integrated.

From 66eb04e64a44792c415573121de19f550109d1e6 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 10 Aug 2017 16:17:21 +0200
Subject: [PATCH 40/43] field_map: fixed typo in FieldMapSliceWise (zp -> dp)

---
 PyHEADTAIL/field_maps/field_map.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/PyHEADTAIL/field_maps/field_map.py b/PyHEADTAIL/field_maps/field_map.py
index f297bda6..4e0b4182 100644
--- a/PyHEADTAIL/field_maps/field_map.py
+++ b/PyHEADTAIL/field_maps/field_map.py
@@ -162,10 +162,12 @@ def track(self, beam):
                 self.slicer, statistics=["mean_x", "mean_y"])
             mx = slices.convert_to_particles(slices.mean_x)
             my = slices.convert_to_particles(slices.mean_y)
+        else:
+            slices = beam.get_slices(self.slicer)
+
         mp_coords = [beam.x - mx,
                      beam.y - my,
                      beam.z] # zip will cut to #fields
-
         mesh_fields_and_mp_coords = zip(self.fields, mp_coords)
 
         # electric fields at each particle position in lab frame [V/m]
@@ -173,13 +175,12 @@ def track(self, beam):
 
         # weigh electric field with slice line charge density;
         # integrate over dt, p0 comes from kicking xp=p_x/p0 instead of p_x
-        slices = beam.get_slices(self.slicer)
         lambda_z = slices.convert_to_particles(slices.lambda_z(smoothen=False))
         kick_factor = (self.length / (beam.beta*c) * beam.charge / beam.p0
                        * lambda_z)
 
         # apply kicks for 1-3 planes depending on #entries in fields
-        for beam_momentum, force_field in zip(['xp', 'yp', 'zp'], part_fields):
+        for beam_momentum, force_field in zip(['xp', 'yp', 'dp'], part_fields):
             val = getattr(beam, beam_momentum)
             setattr(beam, beam_momentum, val + force_field * kick_factor)
         # for 3D, the for loop explicitly does:

From 0430620c9b72b5641a99a31794821430dc9e4c5e Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 10 Aug 2017 17:33:10 +0200
Subject: [PATCH 41/43] test_dispatch: added new _slice_to_particles in test

Otherwise tests will fail.
---
 PyHEADTAIL/testing/unittests/test_dispatch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyHEADTAIL/testing/unittests/test_dispatch.py b/PyHEADTAIL/testing/unittests/test_dispatch.py
index 103486e6..8fc25e1d 100644
--- a/PyHEADTAIL/testing/unittests/test_dispatch.py
+++ b/PyHEADTAIL/testing/unittests/test_dispatch.py
@@ -75,7 +75,7 @@ def test_equivalency_CPU_GPU_functions(self):
         multi_param_fn = [
             'emittance', 'apply_permutation', 'mean_per_slice',
             'std_per_slice', 'emittance_per_slice', 'particles_within_cuts',
-            'particles_outside_cuts',
+            'particles_outside_cuts', 'slice_to_particles',
             'macroparticles_per_slice', 'take', 'convolve', 'arange', 'zeros',
             'seq', 'init_bunch_buffer', 'init_slice_buffer', 'device',
             'searchsortedright', 'searchsortedleft', 'cumsum', 'ones', 'wofz',

From f005648ddbf243b088e9bd6fdbf95aac294eadfa Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 10 Aug 2017 17:51:56 +0200
Subject: [PATCH 42/43] release-script: fixed bug from github-release-drafting
 process

---
 release.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/release.py b/release.py
index 2658d174..12370f5c 100755
--- a/release.py
+++ b/release.py
@@ -112,6 +112,22 @@ def which_part_increases(last_version, new_version):
         raise ValueError(
             'new_version is not a direct successor of last_version.')
 
+def validate_release_version(version_location):
+    '''Validate the new release version new_version by comparing the
+    release branch name to the bumped previous version last_version,
+    which is read from version_location.
+    Raise an error if new_version is not a valid direct successor to
+    last_version. Return new_version.
+    '''
+    last_version = get_version(version_location)
+    release_version = current_branch()[len(release_branch_prefix):]
+
+    # make sure release_version incrementally succeeds last_version
+    which_part_increases(last_version, release_version)
+
+    return release_version
+
+
 def establish_new_version(version_location):
     '''Write the new release version to version_location.
     Check that this agrees with the bumped previous version.
@@ -120,11 +136,7 @@ def establish_new_version(version_location):
         - version_location: string, relative python module notation
         (e.g. PyHEADTAIL._version for PyHEADTAIL/_version.py)
     '''
-    last_version = get_version(version_location)
-    release_version = current_branch()[len(release_branch_prefix):]
-
-    # make sure release_version incrementally succeeds last_version
-    which_part_increases(last_version, release_version)
+    release_version = validate_release_version(version_location)
 
     vpath = version_location.replace('.', '/') + '.py'
     with open(vpath, 'wt') as vfile:
@@ -317,6 +329,7 @@ def finalise_release():
     # all tools installed to automatically draft release?
     draft_release = check_release_tools()
     if draft_release:
+        new_version = validate_release_version(version_location)
         message = get_pullrequest_message(new_version)
 
     # bump version file

From 3f6e96a480404da6482e6c8271c2c099418c3780 Mon Sep 17 00:00:00 2001
From: Adrian Oeftiger <oeftiger@cern.ch>
Date: Thu, 10 Aug 2017 17:53:48 +0200
Subject: [PATCH 43/43] release-script: bumping version file.

---
 PyHEADTAIL/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyHEADTAIL/_version.py b/PyHEADTAIL/_version.py
index c61b5e44..666b2f71 100644
--- a/PyHEADTAIL/_version.py
+++ b/PyHEADTAIL/_version.py
@@ -1 +1 @@
-__version__ = '1.11.6'
+__version__ = '1.12.0'