Skip to content

Commit

Permalink
Current status of reduction generalization and small-destination
Browse files Browse the repository at this point in the history
support.
  • Loading branch information
obilaniu committed Jan 25, 2017
1 parent d838f6a commit d80fc0e
Show file tree
Hide file tree
Showing 3 changed files with 1,541 additions and 349 deletions.
115 changes: 102 additions & 13 deletions src/gpuarray/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,27 @@ typedef enum _ga_order {
GA_F_ORDER=1
} ga_order;

/**
* Supported array reduction operations.
*/

typedef enum _ga_reduce_op {
GA_REDUCE_SUM, /* + */
GA_REDUCE_PROD, /* * */
GA_REDUCE_PRODNZ, /* * (!=0) */
GA_REDUCE_MIN, /* min() */
GA_REDUCE_MAX, /* max() */
GA_REDUCE_ARGMIN, /* argmin() */
GA_REDUCE_ARGMAX, /* argmax() */
GA_REDUCE_MINANDARGMIN, /* min(), argmin() */
GA_REDUCE_MAXANDARGMAX, /* max(), argmax() */
GA_REDUCE_AND, /* & */
GA_REDUCE_OR, /* | */
GA_REDUCE_XOR, /* ^ */
GA_REDUCE_ALL, /* &&/all() */
GA_REDUCE_ANY, /* ||/any() */
} ga_reduce_op;

/**
* Checks if all the specified flags are set.
*
Expand Down Expand Up @@ -614,26 +635,31 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);

GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);


/**
* @brief Computes simultaneously the maxima and the arguments of maxima over
* specified axes of the tensor.
* @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
* min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
* or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
*
* Returns two tensors of identical shape. Both tensors' axes are a subset of
* the axes of the original tensor. The axes to be reduced are specified by
* the caller, and the maxima and arguments of maxima are computed over them.
* Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
* tensors. The destination tensor(s)' axes are a strict subset of the axes of the
* source tensor. The axes to be reduced are specified by the caller, and the
* reduction is performed over these axes, which are then removed in the
* destination.
*
* @param [out] dstMax The resulting tensor of maxima
* @param [out] dstArgmax the resulting tensor of arguments at maxima
* @param [out] dst The destination tensor. Has the same type as the source.
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
* @param [in] src The source tensor.
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
* <= src->nd.
* @param [in] reduxList A list of integers of length reduxLen, indicating
* the axes to be reduced. The order of the axes
* matters for dstArgmax index calculations. All
* entries in the list must be unique, >= 0 and
* < src->nd.
* matters for dstArg index calculations (GpuArray_argmin,
* GpuArray_argmax, GpuArray_minandargmin,
* GpuArray_maxandargmax). All entries in the list must be
* unique, >= 0 and < src->nd.
*
* For example, if a 5D-tensor is reduced with an axis
* For example, if a 5D-tensor is max-reduced with an axis
* list of [3,4,1], then reduxLen shall be 3, and the
* index calculation in every point shall take the form
*
Expand All @@ -647,11 +673,74 @@ GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
* code otherwise.
*/

GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dstMax,
GpuArray* dstArgmax,
GPUARRAY_PUBLIC int GpuArray_sum (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_prod (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_min (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_max (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray* dstArg,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray* dstArg,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_minandargmin(GpuArray* dst,
GpuArray* dstArg,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dst,
GpuArray* dstArg,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_and (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_or (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_xor (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_all (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_any (GpuArray* dst,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);
GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op,
GpuArray* dst,
GpuArray* dstArg,
const GpuArray* src,
unsigned reduxLen,
const unsigned* reduxList);





#ifdef __cplusplus
}
Expand Down
Loading

0 comments on commit d80fc0e

Please sign in to comment.