Skip to content

Commit

Permalink
release-script: Merge branch 'release/v1.13.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
aoeftiger committed Jul 17, 2019
2 parents d33d1f9 + cd41689 commit 85242b0
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 146 deletions.
2 changes: 1 addition & 1 deletion PyHEADTAIL/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.13.4'
__version__ = '1.13.5'
40 changes: 26 additions & 14 deletions PyHEADTAIL/cobra_functions/stats.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -99,21 +99,28 @@ cpdef double emittance(double[::1] u, double[::1] up, double[::1] dp):
cdef double sigma11 = 0.
cdef double sigma12 = 0.
cdef double sigma22 = 0.
cdef double cov_u2 = covariance(u,u)
cdef double cov_u2 = covariance(u, u)
cdef double cov_up2 = covariance(up, up)
cdef double cov_u_up = covariance(up, u)
cdef double cov_u_dp = 0.
cdef double cov_up_dp = 0.
cdef double cov_dp2 = 1.

cdef double term_u2_dp = 0.
cdef double term_u_up_dp = 0.
cdef double term_up2_dp = 0.

if dp != None: #if not None, assign values to variables involving dp
cov_u_dp = covariance(u, dp)
cov_up_dp = covariance(up,dp)
cov_dp2 = covariance(dp,dp)
cov_dp2 = covariance(dp, dp)

if cov_dp2 != 0:
cov_u_dp = covariance(u, dp)
cov_up_dp = covariance(up, dp)

sigma11 = cov_u2 - cov_u_dp*cov_u_dp/cov_dp2
sigma12 = cov_u_up - cov_u_dp*cov_up_dp/cov_dp2
sigma22 = cov_up2 - cov_up_dp*cov_up_dp/cov_dp2
term_u2_dp = cov_u_dp * cov_u_dp / cov_dp2
term_u_up_dp = cov_u_dp * cov_up_dp / cov_dp2
term_up2_dp = cov_up_dp * cov_up_dp / cov_dp2

sigma11 = cov_u2 - term_u2_dp
sigma12 = cov_u_up - term_u_up_dp
sigma22 = cov_up2 - term_up2_dp

return cmath.sqrt(_det_beam_matrix(sigma11, sigma12, sigma22))

Expand Down Expand Up @@ -386,7 +393,7 @@ cpdef emittance_per_slice(int[::1] slice_index_of_particle,
cdef double[::1] cov_u_up = np.zeros(n_slices, dtype=np.double)
cdef double[::1] cov_u_dp = np.zeros(n_slices, dtype=np.double)
cdef double[::1] cov_up_dp = np.zeros(n_slices, dtype=np.double)
cdef double[::1] cov_dp2 = np.ones(n_slices, dtype=np.double)
cdef double[::1] cov_dp2 = np.zeros(n_slices, dtype=np.double)

# compute the covariances
cov_per_slice(slice_index_of_particle, particles_within_cuts,
Expand All @@ -395,6 +402,7 @@ cpdef emittance_per_slice(int[::1] slice_index_of_particle,
n_macroparticles, u, up, cov_u_up)
cov_per_slice(slice_index_of_particle, particles_within_cuts,
n_macroparticles, up, up, cov_up2)

if dp != None:
cov_per_slice(slice_index_of_particle, particles_within_cuts,
n_macroparticles, u, dp, cov_u_dp)
Expand All @@ -408,9 +416,13 @@ cpdef emittance_per_slice(int[::1] slice_index_of_particle,
cdef unsigned int i
for i in xrange(n_slices):
if n_macroparticles[i] > 1:
sigma11 = cov_u2[i] - cov_u_dp[i]*cov_u_dp[i]/cov_dp2[i]
sigma12 = cov_u_up[i] - cov_u_dp[i]*cov_up_dp[i]/cov_dp2[i]
sigma22 = cov_up2[i] - cov_up_dp[i]*cov_up_dp[i]/cov_dp2[i]
sigma11 = cov_u2[i]
sigma12 = cov_u_up[i]
sigma22 = cov_up2[i]
if dp != None and cov_dp2[i] != 0.:
sigma11 -= cov_u_dp[i] * cov_u_dp[i] / cov_dp2[i]
sigma12 -= cov_u_dp[i] * cov_up_dp[i] / cov_dp2[i]
sigma22 -= cov_up_dp[i] * cov_up_dp[i] / cov_dp2[i]
emittance[i] = cmath.sqrt(_det_beam_matrix(sigma11, sigma12, sigma22))
else:
emittance[i] = 0.
Expand Down
203 changes: 72 additions & 131 deletions PyHEADTAIL/gpu/gpu_wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,63 +416,31 @@ def emittance_reference(u, up, dp):
up conjugate momentum array
dp longitudinal momentum variation
'''
sigma11 = 0.
sigma12 = 0.
sigma22 = 0.
cov_u2 = covariance(u,u)
cov_u2 = covariance(u, u)
cov_up2 = covariance(up, up)
cov_u_up = covariance(up, u)
cov_u_dp = 0.
cov_up_dp = 0.
cov_dp2 = 1.

term_u2_dp = 0.
term_u_up_dp = 0.
term_up2_dp = 0.

if dp is not None: #if not None, assign values to variables involving dp
cov_u_dp = covariance(u, dp)
cov_up_dp = covariance(up,dp)
cov_dp2 = covariance(dp,dp)
sigma11 = cov_u2 - cov_u_dp*cov_u_dp/cov_dp2
sigma12 = cov_u_up - cov_u_dp*cov_up_dp/cov_dp2
sigma22 = cov_up2 - cov_up_dp*cov_up_dp/cov_dp2
sigma12 = sigma12.get()
return np.sqrt(sigma11.get() * sigma22.get() - sigma12 * sigma12)
cov_dp2 = covariance(dp, dp)

def emittance_getasync(u, up, dp):
'''
Compute the emittance of GPU arrays. Test with streams. Not faster than
version below.
Args:
u coordinate array
up conjugate momentum array
dp longitudinal momentum variation
'''
if cov_dp2.get() != 0:
cov_u_dp = covariance(u, dp)
cov_up_dp = covariance(up, dp)

n = len(u)
sigma11 = 0.
sigma12 = 0.
sigma22 = 0.
cov_u_dp = 0.
cov_up_dp = 0.
cov_dp2 = 1.
mean_u = skcuda.misc.mean(u)
mean_up = skcuda.misc.mean(up)
term_u2_dp = cov_u_dp * cov_u_dp / cov_dp2
term_u_up_dp = cov_u_dp * cov_up_dp / cov_dp2
term_up2_dp = cov_up_dp * cov_up_dp / cov_dp2

tmp_u = sub_scalar(u, mean_u)
tmp_up = sub_scalar(up, mean_up)
cov_u2 = skcuda.misc.mean(tmp_u*tmp_u).get_async(stream=gpu_utils.streams[0]) * (n / (n + 1.))
cov_u_up = skcuda.misc.mean(tmp_u*tmp_up).get_async(stream=gpu_utils.streams[1]) * (n / (n + 1.))
cov_up2 = skcuda.misc.mean(tmp_up*tmp_up).get_async(stream=gpu_utils.streams[2]) * (n / (n + 1.))
sigma11 = cov_u2 - term_u2_dp
sigma12 = cov_u_up - term_u_up_dp
sigma22 = cov_up2 - term_up2_dp

if dp is not None: #if not None, assign values to variables involving dp
mean_dp = skcuda.misc.mean(dp)
tmp_dp = sub_scalar(dp, mean_dp)
cov_u_dp = skcuda.misc.mean(tmp_u*tmp_dp).get_async(stream=gpu_utils.streams[0]) * (n / (n + 1.))
cov_up_dp = skcuda.misc.mean(tmp_up*tmp_dp).get_async(stream=gpu_utils.streams[1]) * (n / (n + 1.))
cov_dp2 = skcuda.misc.mean(tmp_dp*tmp_dp).get_async(stream=gpu_utils.streams[2]) * (n / (n + 1.))
for i in xrange(3):
gpu_utils.streams[i].synchronize()
sigma11 = cov_u2 - cov_u_dp*cov_u_dp/cov_dp2
sigma12 = cov_u_up - cov_u_dp*cov_up_dp/cov_dp2
sigma22 = cov_up2 - cov_up_dp*cov_up_dp/cov_dp2
return np.sqrt(sigma11 * sigma22 - sigma12*sigma12)
sigma12 = sigma12.get()
return np.sqrt(sigma11.get() * sigma22.get() - sigma12 * sigma12)


def emittance_(u, up, dp):
Expand All @@ -493,20 +461,23 @@ def emittance_(u, up, dp):
cov_u2 = pycuda.gpuarray.sum(tmp_u * tmp_u)
cov_u_up = pycuda.gpuarray.sum(tmp_u * tmp_up)
cov_up2 = pycuda.gpuarray.sum(tmp_up * tmp_up)

sigma11 = cov_u2
sigma12 = cov_u_up
sigma22 = cov_up2

if dp is not None: #if not None, assign values to variables involving dp
mean_dp = skcuda.misc.mean(dp)
tmp_dp = sub_scalar(dp, mean_dp)
cov_u_dp = pycuda.gpuarray.sum(tmp_u * tmp_dp)
cov_up_dp = pycuda.gpuarray.sum(tmp_up * tmp_dp)
cov_dp2 = pycuda.gpuarray.sum(tmp_dp * tmp_dp)

sigma11 = cov_u2 - cov_u_dp*cov_u_dp/cov_dp2
sigma12 = cov_u_up - cov_u_dp*cov_up_dp/cov_dp2
sigma22 = cov_up2 - cov_up_dp*cov_up_dp/cov_dp2
else:
sigma11 = cov_u2
sigma12 = cov_u_up
sigma22 = cov_up2
if cov_dp2.get() != 0:
cov_u_dp = pycuda.gpuarray.sum(tmp_u * tmp_dp)
cov_up_dp = pycuda.gpuarray.sum(tmp_up * tmp_dp)

sigma11 -= cov_u_dp * cov_u_dp / cov_dp2
sigma12 -= cov_u_dp * cov_up_dp / cov_dp2
sigma22 -= cov_up_dp * cov_up_dp / cov_dp2
return pycuda.cumath.sqrt((1./(n*n+n))*(sigma11 * sigma22 - sigma12*sigma12))


Expand All @@ -518,7 +489,7 @@ def emittance(u, up, dp, stream=None):
Args:
u coordinate array
up conjugate momentum array
dp longitudinal momentum variation
dp longitudinal momentum variation (can be None)
stream: In which cuda stream to perform the computations
'''
n = len(u)
Expand All @@ -533,68 +504,32 @@ def emittance(u, up, dp, stream=None):
cov_u_up = pycuda.gpuarray.sum(tmp_space, stream=stream)
tmp_space = _multiply(tmp_up, tmp_up, out=tmp_space, stream=stream)
cov_up2 = pycuda.gpuarray.sum(tmp_space, stream=stream)
if dp is not None: #if not None, assign values to variables involving dp

include_dp = dp is not None
if include_dp: #if not None, assign values to variables involving dp
mean_dp = mean(dp, stream=stream)
tmp_dp = sub_scalar(dp, mean_dp, stream=stream)
tmp_space = _multiply(tmp_u, tmp_dp, out=tmp_space, stream=stream)
cov_u_dp = pycuda.gpuarray.sum(tmp_space, stream=stream)
tmp_space = _multiply(tmp_up, tmp_dp, out=tmp_space, stream=stream)
cov_up_dp = pycuda.gpuarray.sum(tmp_space, stream=stream)
tmp_space = _multiply(tmp_dp, tmp_dp, out=tmp_space, stream=stream)
cov_dp2 = pycuda.gpuarray.sum(tmp_space, stream=stream)
#em = _emittance_dispersion(n, cov_u2, cov_u_up, cov_up2, cov_u_dp, cov_up_dp, cov_dp2, out=out, stream=stream)
_emitt_disp(out, cov_u2, cov_u_up, cov_up2, cov_u_dp, cov_up_dp, cov_dp2,np.float64(n), stream=stream)
else:
#em = _emittance_no_dispersion(n, cov_u2, cov_u_up, cov_up2, out=out,stream=stream)
_emitt_nodisp(out, cov_u2, cov_u_up, cov_up2, np.float64(n), stream=stream)
return out

# spawn multiple streams for each direction/mean/...
# is not faster than above, since the mean() functions use up all the kernels!
def emittance_multistream(u, up, dp, stream=None):
'''
Compute the emittance of GPU arrays. Check the algorithm above for
a more readable version, this one has been 'optimized', e.g. mean->sum
and multiplication at the end to minimize kernel calls/inits of gpuarrs
Args:
u coordinate array
up conjugate momentum array
dp longitudinal momentum variation
stream: In which cuda stream to perform the computations
'''
n = len(u)
streams = gpu_utils.stream_emittance
mean_u = mean(u, stream=streams[0])
mean_up = mean(up, stream=streams[1])
tmp_u = sub_scalar(u, mean_u, stream=streams[0])
tmp_space = _multiply(tmp_u, tmp_u, stream=streams[0])
cov_u2 = pycuda.gpuarray.sum(tmp_space, stream=streams[0])
out = _empty_like(mean_u)
tmp_up = sub_scalar(up, mean_up, stream=streams[1])
streams[0].synchronize()
streams[1].synchronize()
tmp_space = _multiply(tmp_u, tmp_up, out=tmp_space, stream=stream) #specify out to reuse memory, the stream implicitly serializes everything s.t. nothing bad happens...
cov_u_up = pycuda.gpuarray.sum(tmp_space, stream=stream)
tmp_space = _multiply(tmp_up, tmp_up, out=tmp_space, stream=stream)
cov_up2 = pycuda.gpuarray.sum(tmp_space, stream=stream)
if dp is not None: #if not None, assign values to variables involving dp
mean_dp = mean(dp, stream=streams[2])
tmp_dp = sub_scalar(dp, mean_dp, stream=streams[2])
streams[2].synchronize()
tmp_space = _multiply(tmp_u, tmp_dp, out=tmp_space, stream=stream)
cov_u_dp = pycuda.gpuarray.sum(tmp_space, stream=stream)
tmp_space = _multiply(tmp_up, tmp_dp, out=tmp_space, stream=stream)
cov_up_dp = pycuda.gpuarray.sum(tmp_space, stream=stream)
tmp_space = _multiply(tmp_dp, tmp_dp, out=tmp_space, stream=stream)
cov_dp2 = pycuda.gpuarray.sum(tmp_space, stream=stream)

if cov_dp2.get() == 0:
include_dp = False
else:
tmp_space = _multiply(tmp_u, tmp_dp, out=tmp_space, stream=stream)
cov_u_dp = pycuda.gpuarray.sum(tmp_space, stream=stream)
tmp_space = _multiply(tmp_up, tmp_dp, out=tmp_space, stream=stream)
cov_up_dp = pycuda.gpuarray.sum(tmp_space, stream=stream)
if include_dp:
#em = _emittance_dispersion(n, cov_u2, cov_u_up, cov_up2, cov_u_dp, cov_up_dp, cov_dp2, out=out, stream=stream)
_emitt_disp(out, cov_u2, cov_u_up, cov_up2, cov_u_dp, cov_up_dp, cov_dp2,np.float64(n), stream=stream)
gpu_utils.dummy_1(mean_u, stream=stream)
_emitt_disp(out, cov_u2, cov_u_up, cov_up2,
cov_u_dp, cov_up_dp, cov_dp2, np.float64(n), stream=stream)
else:
#em = _emittance_no_dispersion(n, cov_u2, cov_u_up, cov_up2, out=out,stream=stream)
_emitt_nodisp(out, cov_u2, cov_u_up, cov_up2, np.float64(n), stream=stream)
return out


def cumsum(array, dest=None):
'''Return cumulative sum of 1-dimensional GPUArray data.
Works for dtypes np.int32 and np.float64. Wrapper for thrust
Expand All @@ -613,7 +548,7 @@ def cumsum(array, dest=None):
skcuda.misc.cumsum(dest)
return dest

#@profile

def argsort(to_sort):
'''
Return the permutation required to sort the array.
Expand Down Expand Up @@ -723,8 +658,6 @@ def _add_bounds_to_sliceset(sliceset):
sliceset.lower_bounds = lower_bounds
sliceset._pidx_begin = lower_bounds[0].get() # set those properties now!
sliceset._pidx_end = upper_bounds[-1].get() # this way .get() gets called only once
#print 'upper bounds ',sliceset.upper_bounds
#print 'lower bounds ',sliceset.lower_bounds

def sorted_mean_per_slice(sliceset, u, stream=None):
'''
Expand All @@ -747,8 +680,6 @@ def sorted_mean_per_slice(sliceset, u, stream=None):
u.gpudata, np.int32(sliceset.n_slices),
mean_u.gpudata,
block=block, grid=grid, stream=stream)
#pycuda.autoinit.context.synchronize()
#gpu_utils.context.synchronize()
return mean_u

def sorted_std_per_slice(sliceset, u, stream=None):
Expand Down Expand Up @@ -799,23 +730,25 @@ def sorted_emittance_per_slice_slow(sliceset, u, up, dp=None, stream=None):
sliceset specifying slices
u, up the quantities of which to compute the emittance, e.g. x,xp
'''
### computes the covariance on different streams
#n_streams = 3 #HARDCODED FOR NOW
cov_u2 = sorted_cov_per_slice(sliceset, u, u, stream=stream)
cov_up2= sorted_cov_per_slice(sliceset, up, up, stream=stream)
cov_up2 = sorted_cov_per_slice(sliceset, up, up, stream=stream)
cov_u_up = sorted_cov_per_slice(sliceset, u, up, stream=stream)

sigma11 = cov_u2
sigma12 = cov_u_up
sigma22 = cov_up2

if dp is not None:
cov_u_dp = sorted_cov_per_slice(sliceset, u, dp, stream=stream)
cov_up_dp= sorted_cov_per_slice(sliceset, up, dp, stream=stream)
cov_dp2 = sorted_cov_per_slice(sliceset, dp, dp, stream=stream)
else:
cov_dp2 = pycuda.gpuarray.zeros_like(cov_u2) + 1.
cov_u_dp = pycuda.gpuarray.zeros_like(cov_u2)
cov_up_dp = pycuda.gpuarray.zeros_like(cov_u2)

sigma11 = cov_u2 - cov_u_dp*cov_u_dp/cov_dp2
sigma12 = cov_u_up - cov_u_dp*cov_up_dp/cov_dp2
sigma22 = cov_up2 - cov_up_dp*cov_up_dp/cov_dp2
if cov_dp2.get().any():
cov_u_dp = sorted_cov_per_slice(sliceset, u, dp, stream=stream)
cov_up_dp= sorted_cov_per_slice(sliceset, up, dp, stream=stream)

sigma11 -= cov_u_dp * cov_u_dp / cov_dp2
sigma12 -= cov_u_dp * cov_up_dp / cov_dp2
sigma22 -= cov_up_dp * cov_up_dp / cov_dp2

emittance = pycuda.cumath.sqrt(sigma11*sigma22 - sigma12*sigma12)
return emittance

Expand All @@ -838,16 +771,24 @@ def sorted_emittance_per_slice(sliceset, u, up, dp=None, stream=None):
# required here since the scaling is done in the cov_per_slice
# --> 1/(n*n + n) must be 1. ==> n = sqrt(5)/2 -0.5
n = np.sqrt(5.)/2. - 0.5
if dp is not None:

include_dp = dp is not None
if include_dp:
cov_u_dp = sorted_cov_per_slice(sliceset, u, dp, stream=streams[3])
cov_up_dp= sorted_cov_per_slice(sliceset, up, dp, stream=streams[4])
cov_dp2 = sorted_cov_per_slice(sliceset, dp, dp, stream=streams[5])
for s in streams:
s.synchronize()
_emitt_disp(out, cov_u2, cov_u_up, cov_up2, cov_u_dp, cov_up_dp, cov_dp2,np.float64(n), stream=stream)
if not cov_dp2.get().any():
include_dp = False
else:
for s in [streams[0], streams[1], streams[2]]:
for s in streams[:3]:
s.synchronize()

if include_dp:
_emitt_disp(out, cov_u2, cov_u_up, cov_up2,
cov_u_dp, cov_up_dp, cov_dp2, np.float64(n), stream=stream)
else:
_emitt_nodisp(out, cov_u2, cov_u_up, cov_up2, np.float64(n), stream=stream)
return out

Expand Down

0 comments on commit 85242b0

Please sign in to comment.