diff --git a/dlio_benchmark/data_generator/indexed_binary_generator.py b/dlio_benchmark/data_generator/indexed_binary_generator.py index 6a7013b9..719bf238 100644 --- a/dlio_benchmark/data_generator/indexed_binary_generator.py +++ b/dlio_benchmark/data_generator/indexed_binary_generator.py @@ -69,7 +69,7 @@ def generate(self): out_path_spec_off_idx = self.index_file_path_off(out_path_spec) out_path_spec_sz_idx = self.index_file_path_size(out_path_spec) fh = MPI.File.Open(comm, out_path_spec, amode) - samples_per_loop = int(MB / sample_size) + samples_per_loop = int(MB * 16 / sample_size) for sample_index in range(self.my_rank*samples_per_rank, samples_per_rank*(self.my_rank+1), samples_per_loop): #logging.info(f"{utcnow()} rank {self.my_rank} writing {sample_index} * {samples_per_loop} for {samples_per_rank} samples") diff --git a/dlio_benchmark/reader/indexed_binary_mmap_reader.py b/dlio_benchmark/reader/indexed_binary_mmap_reader.py index 500f9d2c..f398a5dd 100644 --- a/dlio_benchmark/reader/indexed_binary_mmap_reader.py +++ b/dlio_benchmark/reader/indexed_binary_mmap_reader.py @@ -57,10 +57,10 @@ def load_index_file(self, global_sample_idx, filename, sample_index): self.file_map_ibr[filename] = [] bin_buffer_mmap = np.memmap(offset_file, mode='r', order='C') bin_buffer = memoryview(bin_buffer_mmap) - self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint8)) + self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint64)) bin_buffer_mmap = np.memmap(sz_file, mode='r', order='C') bin_buffer = memoryview(bin_buffer_mmap) - self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint8)) + self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint64)) @dlp.log def load_index(self): @@ -113,4 +113,4 @@ def is_index_based(self): return True def is_iterator_based(self): - return True \ No newline at end of file + return True