Skip to content

Commit

Permalink
Apply suggestions from reviews
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 committed Nov 14, 2024
1 parent 5ef5966 commit 9d0be1e
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
12 changes: 7 additions & 5 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,15 @@ inline __device__ bool is_bounds_page(page_state_s* const s,
size_t const begin = start_row;
size_t const end = start_row + num_rows;

// For non-nested schemas, rows cannot span pages, so use a more restrictive test except for
// the page_end. This is because we may have adjusted the `num_rows` for the last page in
// `generate_list_column_row_count_estimates()` to compensate for the list row size estimates
// in case of chunked read mode.
// For non-nested schemas, rows cannot span pages, so use a more restrictive test. Make sure to
// relax the predicate for page_end if we adjusted the `num_rows` for the last page to compensate
// for list row size estimates in `generate_list_column_row_count_estimates()` when chunked read
// mode.
return has_repetition
? ((page_begin <= begin && page_end >= begin) || (page_begin <= end && page_end >= end))
: ((page_begin < begin && page_end > begin) || (page_begin < end && page_end >= end));
: ((page_begin < begin && page_end > begin) ||
(page_begin < end &&
(s->page.is_num_rows_adjusted ? (page_end >= end) : (page_end > end))));
}

/**
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,10 @@ struct PageInfo {
// - In the case of a nested schema, you have to decode the repetition and definition
// levels to extract actual column values
int32_t num_input_values;
int32_t chunk_row; // starting row of this page relative to the start of the chunk
int32_t num_rows; // number of rows in this page
int32_t chunk_row; // starting row of this page relative to the start of the chunk
int32_t num_rows; // number of rows in this page
bool is_num_rows_adjusted; // Flag to indicate if the number of rows of this page have been
// adjusted to compensate for the list row size estimates.
// the next four are calculated in gpuComputePageStringSizes
int32_t num_nulls; // number of null values (V2 header), but recalculated for string cols
int32_t num_valids; // number of non-null values, taking into account skip_rows/num_rows
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/parquet/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ struct set_final_row_count {
size_t const page_start_row = chunk.start_row + page.chunk_row;
size_t const chunk_last_row = chunk.start_row + chunk.num_rows;
page.num_rows = chunk_last_row - page_start_row;
page.is_num_rows_adjusted = true;
}
};

Expand Down

0 comments on commit 9d0be1e

Please sign in to comment.