Skip to content

Commit

Permalink
Add clarifying comment on modulo optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
jarnesino committed Oct 30, 2024
1 parent e638e65 commit a3b7103
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions cuda/src/poly/eval_at_point.cu
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,9 @@ __global__ void eval_many_at_point_first_pass(m31 **g_coeffs, qm31 *temp, qm31 *

int poly_index = blockIdx.x / blocks_in_poly;

s_coeffs[idx] = g_coeffs[poly_index][(blockIdx.x % blocks_in_poly) * coeffs_size + idx];
s_coeffs[idx + blockDim.x] = g_coeffs[poly_index][(blockIdx.x % blocks_in_poly) * coeffs_size + idx + blockDim.x];
// A % X == A & (X-1) when X is a power of two
s_coeffs[idx] = g_coeffs[poly_index][(blockIdx.x & (blocks_in_poly - 1)) * coeffs_size + idx];
s_coeffs[idx + blockDim.x] = g_coeffs[poly_index][(blockIdx.x & (blocks_in_poly - 1)) * coeffs_size + idx + blockDim.x];
__syncthreads();

int level_size = coeffs_size >> 1;
Expand Down

0 comments on commit a3b7103

Please sign in to comment.