diff --git a/python-api-examples/generate-subtitles.py b/python-api-examples/generate-subtitles.py index b1b9eca58b..29adb22dc5 100755 --- a/python-api-examples/generate-subtitles.py +++ b/python-api-examples/generate-subtitles.py @@ -419,7 +419,7 @@ def main(): recognizer.decode_streams(streams) for seg, stream in zip(segments, streams): - seg.text = stream.result.text + seg.text = stream.result.text.decode("utf-8", "ignore") segment_list.append(seg) srt_filename = Path(args.sound_file).with_suffix(".srt") diff --git a/python-api-examples/non_streaming_server.py b/python-api-examples/non_streaming_server.py index 20e8b68a0b..6354453890 100755 --- a/python-api-examples/non_streaming_server.py +++ b/python-api-examples/non_streaming_server.py @@ -817,7 +817,7 @@ async def handle_connection_impl( stream.accept_waveform(sample_rate, samples) await self.compute_and_decode(stream) - result = stream.result.text + result = stream.result.text.decode("utf-8", "ignore") logging.info(f"result: {result}") if result: diff --git a/python-api-examples/offline-decode-files.py b/python-api-examples/offline-decode-files.py index 78a1af042b..f92201c3dc 100755 --- a/python-api-examples/offline-decode-files.py +++ b/python-api-examples/offline-decode-files.py @@ -436,7 +436,7 @@ def main(): streams.append(s) recognizer.decode_streams(streams) - results = [s.result.text for s in streams] + results = [s.result.text.decode("utf-8", "ignore") for s in streams] end_time = time.time() print("Done!") diff --git a/python-api-examples/two-pass-speech-recognition-from-microphone.py b/python-api-examples/two-pass-speech-recognition-from-microphone.py index 697e94850b..0d667c6332 100755 --- a/python-api-examples/two-pass-speech-recognition-from-microphone.py +++ b/python-api-examples/two-pass-speech-recognition-from-microphone.py @@ -344,7 +344,7 @@ def run_second_pass( recognizer.decode_stream(stream) - return stream.result.text + return stream.result.text.decode("utf-8", "ignore") def main(): diff --git a/python-api-examples/vad-with-non-streaming-asr.py b/python-api-examples/vad-with-non-streaming-asr.py index 4f49743893..e1982ea0ed 100755 --- a/python-api-examples/vad-with-non-streaming-asr.py +++ b/python-api-examples/vad-with-non-streaming-asr.py @@ -335,7 +335,7 @@ def main(): vad.pop() recognizer.decode_stream(stream) - text = stream.result.text.strip().lower() + text = stream.result.text.decode("utf-8", "ignore").strip().lower() if len(text): idx = len(texts) texts.append(text) diff --git a/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h b/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h index 2eb908b53e..a4398d60de 100644 --- a/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h +++ b/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h @@ -42,6 +42,15 @@ static OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src, } auto sym = sym_table[src.tokens[i]]; text.append(sym); + + if (sym.size() == 1 && sym[0] != ' ') { + // for byte bpe models + std::ostringstream os; + os << "<0x" << std::hex << std::uppercase << static_cast(sym[0]) + << ">"; + sym = os.str(); + } + r.tokens.push_back(std::move(sym)); } r.text = std::move(text); diff --git a/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h b/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h index 3f4e2b05ee..e514117646 100644 --- a/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h +++ b/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h @@ -44,6 +44,13 @@ static OfflineRecognitionResult Convert( auto sym = sym_table[i]; text.append(sym); + if (sym.size() == 1 && sym[0] != ' ') { + // for byte bpe models + std::ostringstream os; + os << "<0x" << std::hex << std::uppercase << static_cast(sym[0]) + << ">"; + sym = os.str(); + } r.tokens.push_back(std::move(sym)); } r.text = std::move(text); diff --git a/sherpa-onnx/csrc/online-recognizer-ctc-impl.h b/sherpa-onnx/csrc/online-recognizer-ctc-impl.h index f59dbd84bc..4c35e38891 100644 --- a/sherpa-onnx/csrc/online-recognizer-ctc-impl.h +++ b/sherpa-onnx/csrc/online-recognizer-ctc-impl.h @@ -35,6 +35,15 @@ static OnlineRecognizerResult Convert(const OnlineCtcDecoderResult &src, auto sym = sym_table[i]; r.text.append(sym); + + if (sym.size() == 1 && sym[0] != ' ') { + // for byte bpe models + std::ostringstream os; + os << "<0x" << std::hex << std::uppercase << static_cast(sym[0]) + << ">"; + sym = os.str(); + } + r.tokens.push_back(std::move(sym)); } diff --git a/sherpa-onnx/csrc/online-recognizer-transducer-impl.h b/sherpa-onnx/csrc/online-recognizer-transducer-impl.h index a9ba0a95e3..9a0c68d00f 100644 --- a/sherpa-onnx/csrc/online-recognizer-transducer-impl.h +++ b/sherpa-onnx/csrc/online-recognizer-transducer-impl.h @@ -47,6 +47,15 @@ static OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src, auto sym = sym_table[i]; r.text.append(sym); + + if (sym.size() == 1 && sym[0] != ' ') { + // for byte bpe models + std::ostringstream os; + os << "<0x" << std::hex << std::uppercase << static_cast(sym[0]) + << ">"; + sym = os.str(); + } + r.tokens.push_back(std::move(sym)); } diff --git a/sherpa-onnx/python/csrc/offline-stream.cc b/sherpa-onnx/python/csrc/offline-stream.cc index e9eb9c57ad..1e048b0f86 100644 --- a/sherpa-onnx/python/csrc/offline-stream.cc +++ b/sherpa-onnx/python/csrc/offline-stream.cc @@ -24,7 +24,10 @@ static void PybindOfflineRecognitionResult(py::module *m) { // NOLINT using PyClass = OfflineRecognitionResult; py::class_(*m, "OfflineRecognitionResult") .def_property_readonly("text", - [](const PyClass &self) { return self.text; }) + [](const PyClass &self) -> py::bytes { + py::bytes bytes(self.text); + return bytes; + }) .def_property_readonly("tokens", [](const PyClass &self) { return self.tokens; }) .def_property_readonly( diff --git a/sherpa-onnx/python/csrc/online-recognizer.cc b/sherpa-onnx/python/csrc/online-recognizer.cc index 5a2dba0950..dc9bc7e262 100644 --- a/sherpa-onnx/python/csrc/online-recognizer.cc +++ b/sherpa-onnx/python/csrc/online-recognizer.cc @@ -14,8 +14,11 @@ namespace sherpa_onnx { static void PybindOnlineRecognizerResult(py::module *m) { using PyClass = OnlineRecognizerResult; py::class_(*m, "OnlineRecognizerResult") - .def_property_readonly( - "text", [](PyClass &self) -> std::string { return self.text; }) + .def_property_readonly("text", + [](PyClass &self) -> py::bytes { + py::bytes bytes(self.text); + return bytes; + }) .def_property_readonly( "tokens", [](PyClass &self) -> std::vector { return self.tokens; }) diff --git a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py index 6af47e11fa..273137f132 100644 --- a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py +++ b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py @@ -491,7 +491,7 @@ def is_ready(self, s: OnlineStream) -> bool: return self.recognizer.is_ready(s) def get_result(self, s: OnlineStream) -> str: - return self.recognizer.get_result(s).text.strip() + return self.recognizer.get_result(s).text.decode("utf-8", "ignore").strip() def tokens(self, s: OnlineStream) -> List[str]: return self.recognizer.get_result(s).tokens