diff --git a/lhotse/audio.py b/lhotse/audio.py index f6267850c..46045ca6a 100644 --- a/lhotse/audio.py +++ b/lhotse/audio.py @@ -1767,7 +1767,6 @@ def info( force_opus_sampling_rate: Optional[int] = None, force_read_audio: bool = False, ) -> LibsndfileCompatibleAudioInfo: - if force_read_audio: # This is a reliable fallback for situations when the user knows that audio files do not # have duration metadata in their headers. @@ -1824,7 +1823,8 @@ def torchaudio_2_0_ffmpeg_enabled() -> bool: import torchaudio from packaging import version - ver = version.parse(torchaudio.__version__) + # Handle cases like '2.0.0+cu117' + ver = version.parse(version.parse(torchaudio.__version__).base_version) if ver == version.parse("2.0.0"): return os.environ.get("TORCHAUDIO_USE_BACKEND_DISPATCHER", "0") == "1" if ver >= version.parse("2.1.0"): @@ -2374,6 +2374,40 @@ def sph_info(path: Pathlike) -> LibsndfileCompatibleAudioInfo: def read_sph( sph_path: Pathlike, offset: Seconds = 0.0, duration: Optional[Seconds] = None +) -> Tuple[np.ndarray, int]: + """ + Reads SPH files either using torchaudio or using sph2pipe in a shell subprocess. + + :return: a tuple of audio samples and the sampling rate. + """ + try: + return read_sph_torchaudio(sph_path=sph_path, offset=offset, duration=duration) + except: + return read_sph_sph2pipe(sph_path=sph_path, offset=offset, duration=duration) + + +def read_sph_torchaudio( + sph_path: Pathlike, offset: Seconds = 0.0, duration: Optional[Seconds] = None +) -> Tuple[np.ndarray, int]: + """ + Reads SPH files using torchaudio. + + :return: a tuple of audio samples and the sampling rate. + """ + # Actual audio reading. + sph_path = str(sph_path) + try: + samples, sampling_rate = torchaudio_2_ffmpeg_load(sph_path, offset, duration) + except RuntimeError as e: + raise AudioLoadingError( + f"{e}\nThe torchaudio command for which the program failed is: " + f"torchaudio.load({sph_path}, frame_offset={int(offset * 100)}, num_frames={int(duration * 100)})" + ) + return samples, sampling_rate + + +def read_sph_sph2pipe( + sph_path: Pathlike, offset: Seconds = 0.0, duration: Optional[Seconds] = None ) -> Tuple[np.ndarray, int]: """ Reads SPH files using sph2pipe in a shell subprocess. @@ -2381,7 +2415,6 @@ def read_sph( :return: a tuple of audio samples and the sampling rate. """ - sph_path = Path(sph_path) # Construct the sph2pipe command depending on the arguments passed. diff --git a/test/cut/test_cut_augmentation.py b/test/cut/test_cut_augmentation.py index 7e9d04a7b..968c6e12d 100644 --- a/test/cut/test_cut_augmentation.py +++ b/test/cut/test_cut_augmentation.py @@ -420,7 +420,7 @@ def test_mixed_cut_start01_reverb_rir_with_fast_random( ): mixed_rvb = cut_with_supervision_start01.append( cut_with_supervision_start01 - ).reverb_rir() + ).reverb_rir(mix_first=False) assert mixed_rvb.start == 0 # MixedCut always starts at 0 assert mixed_rvb.duration == cut_with_supervision_start01.duration * 2 assert mixed_rvb.end == cut_with_supervision_start01.duration * 2