diff --git a/GeneratingDataset.py b/GeneratingDataset.py index 0beb484e77..8bd9ec6bbb 100644 --- a/GeneratingDataset.py +++ b/GeneratingDataset.py @@ -1498,8 +1498,9 @@ def __init__(self, path, prefix, bpe, audio, partition_epoch=None, fixed_random_ self.prefix = prefix assert prefix in ["train", "dev", "eval"] assert os.path.exists(path + "/train-clean-100") - from Util import monkeyfix_glib - monkeyfix_glib() + import Util + Util.monkeyfix_glib() + Util.monkeypatch_audioread() self.bpe = BytePairEncoding(**bpe) self.labels = self.bpe.labels self._fixed_random_seed = fixed_random_seed diff --git a/Util.py b/Util.py index d72ba0febb..a42669fc84 100644 --- a/Util.py +++ b/Util.py @@ -2586,3 +2586,17 @@ def monkeyfix_glib(): # and then reraise a KeyboardInterrupt in that thread. # However, we want and expect to get the KeyboardInterrupt in the main thread. GLib.MainLoop.__init__ = lambda *args, **kwargs: None + + +def monkeypatch_audioread(): + """ + audioread does not behave optimal in some cases. + E.g. each call to _ca_available() takes quite long because of the ctypes.util.find_library usage. + We will patch this. + """ + try: + import audioread + except ImportError: + return + res = audioread._ca_available() + audioread._ca_available = lambda: res diff --git a/tools/dump-dataset.py b/tools/dump-dataset.py index 337a3d7839..fe0a461a4f 100755 --- a/tools/dump-dataset.py +++ b/tools/dump-dataset.py @@ -93,7 +93,8 @@ def dump_dataset(dataset, options): num_seqs_s = "~%i" % dataset.estimated_num_seqs except TypeError: # a number is required, not NoneType num_seqs_s = "?" - progress = "%i/%s (%.02f%%)" % (seq_idx, num_seqs_s, complete_frac * 100) + progress_prefix = "%i/%s" % (seq_idx, num_seqs_s) + progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100) if complete_frac > 0: total_time_estimated = start_elapsed / complete_frac remaining_estimated = total_time_estimated - start_elapsed @@ -121,7 +122,7 @@ def dump_dataset(dataset, options): if stats: stats.collect(data) if options.type == "null": - Util.progress_bar_with_time(complete_frac) + Util.progress_bar_with_time(complete_frac, prefix=progress_prefix) seq_idx += 1