Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use torchaudio to read sph files first #1067

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions lhotse/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,7 +1767,6 @@ def info(
force_opus_sampling_rate: Optional[int] = None,
force_read_audio: bool = False,
) -> LibsndfileCompatibleAudioInfo:

if force_read_audio:
# This is a reliable fallback for situations when the user knows that audio files do not
# have duration metadata in their headers.
Expand Down Expand Up @@ -1824,7 +1823,8 @@ def torchaudio_2_0_ffmpeg_enabled() -> bool:
import torchaudio
from packaging import version

ver = version.parse(torchaudio.__version__)
# Handle cases like '2.0.0+cu117'
ver = version.parse(version.parse(torchaudio.__version__).base_version)
if ver == version.parse("2.0.0"):
return os.environ.get("TORCHAUDIO_USE_BACKEND_DISPATCHER", "0") == "1"
if ver >= version.parse("2.1.0"):
Expand Down Expand Up @@ -2374,14 +2374,47 @@ def sph_info(path: Pathlike) -> LibsndfileCompatibleAudioInfo:

def read_sph(
sph_path: Pathlike, offset: Seconds = 0.0, duration: Optional[Seconds] = None
) -> Tuple[np.ndarray, int]:
"""
Reads SPH files either using torchaudio or using sph2pipe in a shell subprocess.

:return: a tuple of audio samples and the sampling rate.
"""
try:
return read_sph_torchaudio(sph_path=sph_path, offset=offset, duration=duration)
except:
return read_sph_sph2pipe(sph_path=sph_path, offset=offset, duration=duration)


def read_sph_torchaudio(
sph_path: Pathlike, offset: Seconds = 0.0, duration: Optional[Seconds] = None
) -> Tuple[np.ndarray, int]:
"""
Reads SPH files using torchaudio.

:return: a tuple of audio samples and the sampling rate.
"""
# Actual audio reading.
sph_path = str(sph_path)
try:
samples, sampling_rate = torchaudio_2_ffmpeg_load(sph_path, offset, duration)
except RuntimeError as e:
raise AudioLoadingError(
f"{e}\nThe torchaudio command for which the program failed is: "
f"torchaudio.load({sph_path}, frame_offset={int(offset * 100)}, num_frames={int(duration * 100)})"
)
return samples, sampling_rate


def read_sph_sph2pipe(
sph_path: Pathlike, offset: Seconds = 0.0, duration: Optional[Seconds] = None
) -> Tuple[np.ndarray, int]:
"""
Reads SPH files using sph2pipe in a shell subprocess.
Unlike audioread, correctly supports offsets and durations for reading short chunks.

:return: a tuple of audio samples and the sampling rate.
"""

sph_path = Path(sph_path)

# Construct the sph2pipe command depending on the arguments passed.
Expand Down
2 changes: 1 addition & 1 deletion test/cut/test_cut_augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def test_mixed_cut_start01_reverb_rir_with_fast_random(
):
mixed_rvb = cut_with_supervision_start01.append(
cut_with_supervision_start01
).reverb_rir()
).reverb_rir(mix_first=False)
assert mixed_rvb.start == 0 # MixedCut always starts at 0
assert mixed_rvb.duration == cut_with_supervision_start01.duration * 2
assert mixed_rvb.end == cut_with_supervision_start01.duration * 2
Expand Down