Skip to content

Commit

Permalink
IMprove CI Performance.
Browse files Browse the repository at this point in the history
1. Improve generation of hdf5 files it was taking too long.
2. Do not finalize in pytest this allows multiple tests to run together.
  • Loading branch information
hariharan-devarajan committed Aug 30, 2024
1 parent d6924f3 commit c2af0c2
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 102 deletions.
85 changes: 9 additions & 76 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,118 +81,51 @@ jobs:
- name: test_gen_data
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_gen_data[png-tensorflow] -v
mpirun -np 2 pytest -k test_gen_data[npz-tensorflow] -v
mpirun -np 2 pytest -k test_gen_data[jpeg-tensorflow] -v
mpirun -np 2 pytest -k test_gen_data[tfrecord-tensorflow] -v
mpirun -np 2 pytest -k test_gen_data[hdf5-tensorflow] -v
mpirun -np 2 pytest -k test_gen_data[indexed_binary-tensorflow] -v
mpirun -np 2 pytest -k test_gen_data[mmap_indexed_binary-tensorflow] -v
mpirun -np 2 pytest -k test_gen_data -v --durations=0
rm -rf data
- name: test_custom_storage_root_gen_data
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_storage_root_gen_data[png-tensorflow] -v
mpirun -np 2 pytest -k test_storage_root_gen_data[npz-tensorflow] -v
mpirun -np 2 pytest -k test_storage_root_gen_data[jpeg-tensorflow] -v
mpirun -np 2 pytest -k test_storage_root_gen_data[tfrecord-tensorflow] -v
mpirun -np 2 pytest -k test_storage_root_gen_data[hdf5-tensorflow] -v
mpirun -np 2 pytest -k test_storage_root_gen_data[indexed_binary-tensorflow] -v
mpirun -np 2 pytest -k test_storage_root_gen_data[mmap_indexed_binary-tensorflow] -v
mpirun -np 2 pytest -k test_storage_root_gen_data -v --durations=0
rm -rf data
- name: test_train
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_train[png-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[npz-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[jpeg-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[tfrecord-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[hdf5-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[csv-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[png-pytorch-pytorch] -v
mpirun -np 2 pytest -k test_train[npz-pytorch-pytorch] -v
mpirun -np 2 pytest -k test_train[jpeg-pytorch-pytorch] -v
mpirun -np 2 pytest -k test_train[hdf5-pytorch-pytorch] -v
mpirun -np 2 pytest -k test_train[csv-pytorch-pytorch] -v
mpirun -np 2 pytest -k test_train[png-tensorflow-dali] -v
mpirun -np 2 pytest -k test_train[npz-tensorflow-dali] -v
mpirun -np 2 pytest -k test_train[jpeg-tensorflow-dali] -v
mpirun -np 2 pytest -k test_train[hdf5-tensorflow-dali] -v
mpirun -np 2 pytest -k test_train[csv-tensorflow-dali] -v
mpirun -np 2 pytest -k test_train[png-pytorch-dali] -v
mpirun -np 2 pytest -k test_train[npz-pytorch-dali] -v
mpirun -np 2 pytest -k test_train[jpeg-pytorch-dali] -v
mpirun -np 2 pytest -k test_train[hdf5-pytorch-dali] -v
mpirun -np 2 pytest -k test_train[csv-pytorch-dali] -v
mpirun -np 2 pytest -k test_train[indexed_binary-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[indexed_binary-pytorch-pytorch] -v
mpirun -np 2 pytest -k test_train[indexed_binary-tensorflow-dali] -v
mpirun -np 2 pytest -k test_train[indexed_binary-pytorch-dali] -v
mpirun -np 2 pytest -k test_train[mmap_indexed_binary-tensorflow-tensorflow] -v
mpirun -np 2 pytest -k test_train[mmap_indexed_binary-pytorch-pytorch] -v
mpirun -np 2 pytest -k test_train[mmap_indexed_binary-tensorflow-dali] -v
mpirun -np 2 pytest -k test_train[mmap_indexed_binary-pytorch-dali] -v
mpirun -np 2 pytest -k test_train -v --durations=0
rm -rf data
- name: test_custom_storage_root_train
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_custom_storage_root_train[png-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[npz-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[jpeg-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[tfrecord-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[hdf5-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[csv-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[png-pytorch] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[npz-pytorch] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[jpeg-pytorch] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[hdf5-pytorch] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[csv-pytorch] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[indexed_binary-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[indexed_binary-pytorch] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[mmap_indexed_binary-tensorflow] -v
mpirun -np 2 pytest -k test_custom_storage_root_train[mmap_indexed_binary-pytorch] -v
mpirun -np 2 pytest -k test_custom_storage_root_train -v --durations=0
rm -rf data
- name: test_checkpoint_epoch
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_checkpoint_epoch[tensorflow-1024-optimizers0-2-layer_params0-all_ranks] -v
mpirun -np 2 pytest -k test_checkpoint_epoch[pytorch-1024-optimizers1-2-layer_params1-all_ranks] -v
mpirun -np 2 pytest -k test_checkpoint_epoch[tensorflow-1024-optimizers2-2-layer_params2-rank_zero] -v
mpirun -np 2 pytest -k test_checkpoint_epoch[pytorch-1024-optimizers3-2-layer_params3-rank_zero] -v
mpirun -np 2 pytest -k test_checkpoint_epoch[tensorflow-1024-optimizers4-1-layer_params4-all_ranks] -v
mpirun -np 2 pytest -k test_checkpoint_epoch[pytorch-1024-optimizers5-1-layer_params5-all_ranks] -v
mpirun -np 2 pytest -k test_checkpoint_epoch -v --durations=0
rm -rf data
- name: test_checkpoint_step
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_checkpoint_step -v
mpirun -np 2 pytest -k test_checkpoint_step -v --durations=0
- name: test_eval
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_eval -v
- name: test_multi_threads
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_multi_threads[tensorflow-0] -v
mpirun -np 2 pytest -k test_multi_threads[tensorflow-1] -v
mpirun -np 2 pytest -k test_multi_threads[tensorflow-2] -v
mpirun -np 2 pytest -k test_multi_threads[pytorch-0] -v
mpirun -np 2 pytest -k test_multi_threads[pytorch-1] -v
mpirun -np 2 pytest -k test_multi_threads[pytorch-2] -v
mpirun -np 2 pytest -k test_multi_threads -v --durations=0
rm -rf data
- name: test-pytorch-multiprocessing-context
run: |
source ${VENV_PATH}/bin/activate
mpirun -np 2 pytest -k test_pytorch_multiprocessing_context[0-None] -v
mpirun -np 2 pytest -k test_pytorch_multiprocessing_context[1-fork] -v
mpirun -np 2 pytest -k test_pytorch_multiprocessing_context[2-forkserver] -v
mpirun -np 2 pytest -k test_pytorch_multiprocessing_context[2-spawn] -v
mpirun -np 2 pytest -k test_pytorch_multiprocessing_context -v --durations=0
rm -rf data
- name: test_subset
run: |
source ${VENV_PATH}/bin/activate
rm -rf output data checkpoints
mpirun -np 2 pytest -k test_subset -v
mpirun -np 2 pytest -k test_subset -v --durations=0
rm -rf data
- name: test-tf-loader-tfrecord
run: |
Expand Down
41 changes: 16 additions & 25 deletions dlio_benchmark/data_generator/hdf5_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,38 +45,29 @@ def generate(self):
"""
super().generate()
np.random.seed(10)
samples_per_iter=max(1, int(self._args.generation_buffer_size/self._args.record_length))
record_labels = [0] * self.num_samples
dim = self.get_dimension(self.total_files_to_generate)
chunks = None
if self.enable_chunking:
chunk_dimension = int(math.ceil(math.sqrt(self.chunk_size)))
if chunk_dimension > self._dimension:
chunk_dimension = self._dimension
chunks = (1, chunk_dimension, chunk_dimension)
compression = None
compression_level = None
if self.compression != Compression.NONE:
compression = str(self.compression)
if self.compression == Compression.GZIP:
compression_level = self.compression_level
for i in dlp.iter(range(self.my_rank, int(self.total_files_to_generate), self.comm_size)):
progress(i, self.total_files_to_generate, "Generating HDF5 Data")
dim1 = dim[2*i]
dim2 = dim[2*i+1]
records = np.random.randint(255, size=(samples_per_iter, dim1, dim2), dtype=np.uint8)
records = np.random.randint(255, size=(dim1, dim2, self.num_samples), dtype=np.uint8)
out_path_spec = self.storage.get_uri(self._file_list[i])
progress(i+1, self.total_files_to_generate, "Generating NPZ Data")
hf = h5py.File(out_path_spec, 'w')
chunks = None
if self.enable_chunking:
chunk_dimension = int(math.ceil(math.sqrt(self.chunk_size)))
if chunk_dimension > self._dimension:
chunk_dimension = self._dimension
chunks = (1, chunk_dimension, chunk_dimension)
compression = None
compression_level = None
if self.compression != Compression.NONE:
compression = str(self.compression)
if self.compression == Compression.GZIP:
compression_level = self.compression_level
dset = hf.create_dataset('records', (self.num_samples, dim1, dim2), chunks=chunks, compression=compression,
compression_opts=compression_level, dtype=np.uint8)
samples_written = 0
while samples_written < self.num_samples:
if samples_per_iter < self.num_samples-samples_written:
samples_to_write = samples_per_iter
else:
samples_to_write = self.num_samples-samples_written
dset[samples_written:samples_written+samples_to_write] = records[:samples_to_write]
samples_written += samples_to_write
hf.create_dataset('records', (self.num_samples, dim1, dim2), chunks=chunks, compression=compression,
compression_opts=compression_level, dtype=np.uint8, data=records)
hf.create_dataset('labels', data=record_labels)
hf.close()
np.random.seed()
3 changes: 2 additions & 1 deletion tests/dlio_benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ def init():
DLIOMPI.get_instance().initialize()

def finalize():
DLIOMPI.get_instance().finalize()
# DLIOMPI.get_instance().finalize()
pass

def clean(storage_root="./") -> None:
comm.Barrier()
Expand Down

0 comments on commit c2af0c2

Please sign in to comment.