k2-fsa · csukuangfj · Dec 22, 2023 · Dec 22, 2023 · Dec 23, 2023 · Dec 23, 2023
diff --git a/python-api-examples/generate-subtitles.py b/python-api-examples/generate-subtitles.py
@@ -419,7 +419,7 @@ def main():
 
         recognizer.decode_streams(streams)
         for seg, stream in zip(segments, streams):
-            seg.text = stream.result.text
+            seg.text = stream.result.text.decode("utf-8", "ignore")
             segment_list.append(seg)
 
     srt_filename = Path(args.sound_file).with_suffix(".srt")

diff --git a/python-api-examples/non_streaming_server.py b/python-api-examples/non_streaming_server.py
@@ -817,7 +817,7 @@ async def handle_connection_impl(
             stream.accept_waveform(sample_rate, samples)
 
             await self.compute_and_decode(stream)
-            result = stream.result.text
+            result = stream.result.text.decode("utf-8", "ignore")
             logging.info(f"result: {result}")
 
             if result:

diff --git a/python-api-examples/offline-decode-files.py b/python-api-examples/offline-decode-files.py
@@ -436,7 +436,7 @@ def main():
         streams.append(s)
 
     recognizer.decode_streams(streams)
-    results = [s.result.text for s in streams]
+    results = [s.result.text.decode("utf-8", "ignore") for s in streams]
     end_time = time.time()
     print("Done!")
 

diff --git a/python-api-examples/two-pass-speech-recognition-from-microphone.py b/python-api-examples/two-pass-speech-recognition-from-microphone.py
@@ -344,7 +344,7 @@ def run_second_pass(
 
     recognizer.decode_stream(stream)
 
-    return stream.result.text
+    return stream.result.text.decode("utf-8", "ignore")
 
 
 def main():

diff --git a/python-api-examples/vad-with-non-streaming-asr.py b/python-api-examples/vad-with-non-streaming-asr.py
@@ -335,7 +335,7 @@ def main():
                 vad.pop()
                 recognizer.decode_stream(stream)
 
-                text = stream.result.text.strip().lower()
+                text = stream.result.text.decode("utf-8", "ignore").strip().lower()
                 if len(text):
                     idx = len(texts)
                     texts.append(text)

diff --git a/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h b/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
@@ -42,6 +42,15 @@ static OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
     }
     auto sym = sym_table[src.tokens[i]];
     text.append(sym);
+
+    if (sym.size() == 1 && sym[0] != ' ') {
+      // for byte bpe models
+      std::ostringstream os;
+      os << "<0x" << std::hex << std::uppercase << static_cast<uint8_t>(sym[0])
+         << ">";
+      sym = os.str();
+    }
+
     r.tokens.push_back(std::move(sym));
   }
   r.text = std::move(text);

diff --git a/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h b/sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
@@ -44,6 +44,13 @@ static OfflineRecognitionResult Convert(
     auto sym = sym_table[i];
     text.append(sym);
 
+    if (sym.size() == 1 && sym[0] != ' ') {
+      // for byte bpe models
+      std::ostringstream os;
+      os << "<0x" << std::hex << std::uppercase << static_cast<uint8_t>(sym[0])
+         << ">";
+      sym = os.str();
+    }
     r.tokens.push_back(std::move(sym));
   }
   r.text = std::move(text);

diff --git a/sherpa-onnx/csrc/online-recognizer-ctc-impl.h b/sherpa-onnx/csrc/online-recognizer-ctc-impl.h
@@ -35,6 +35,15 @@ static OnlineRecognizerResult Convert(const OnlineCtcDecoderResult &src,
     auto sym = sym_table[i];
 
     r.text.append(sym);
+
+    if (sym.size() == 1 && sym[0] != ' ') {
+      // for byte bpe models
+      std::ostringstream os;
+      os << "<0x" << std::hex << std::uppercase << static_cast<uint8_t>(sym[0])
+         << ">";
+      sym = os.str();
+    }
+
     r.tokens.push_back(std::move(sym));
   }
 

diff --git a/sherpa-onnx/csrc/online-recognizer-transducer-impl.h b/sherpa-onnx/csrc/online-recognizer-transducer-impl.h
@@ -47,6 +47,15 @@ static OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
     auto sym = sym_table[i];
 
     r.text.append(sym);
+
+    if (sym.size() == 1 && sym[0] != ' ') {
+      // for byte bpe models
+      std::ostringstream os;
+      os << "<0x" << std::hex << std::uppercase << static_cast<uint8_t>(sym[0])
+         << ">";
+      sym = os.str();
+    }
+
     r.tokens.push_back(std::move(sym));
   }
 

diff --git a/sherpa-onnx/python/csrc/offline-stream.cc b/sherpa-onnx/python/csrc/offline-stream.cc
@@ -24,7 +24,10 @@ static void PybindOfflineRecognitionResult(py::module *m) {  // NOLINT
   using PyClass = OfflineRecognitionResult;
   py::class_<PyClass>(*m, "OfflineRecognitionResult")
       .def_property_readonly("text",
-                             [](const PyClass &self) { return self.text; })
+                             [](const PyClass &self) -> py::bytes {
+                               py::bytes bytes(self.text);
+                               return bytes;
+                             })
       .def_property_readonly("tokens",
                              [](const PyClass &self) { return self.tokens; })
       .def_property_readonly(

diff --git a/sherpa-onnx/python/csrc/online-recognizer.cc b/sherpa-onnx/python/csrc/online-recognizer.cc
@@ -14,8 +14,11 @@ namespace sherpa_onnx {
 static void PybindOnlineRecognizerResult(py::module *m) {
   using PyClass = OnlineRecognizerResult;
   py::class_<PyClass>(*m, "OnlineRecognizerResult")
-      .def_property_readonly(
-          "text", [](PyClass &self) -> std::string { return self.text; })
+      .def_property_readonly("text",
+                             [](PyClass &self) -> py::bytes {
+                               py::bytes bytes(self.text);
+                               return bytes;
+                             })
       .def_property_readonly(
           "tokens",
           [](PyClass &self) -> std::vector<std::string> { return self.tokens; })

diff --git a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
@@ -491,7 +491,7 @@ def is_ready(self, s: OnlineStream) -> bool:
         return self.recognizer.is_ready(s)
 
     def get_result(self, s: OnlineStream) -> str:
-        return self.recognizer.get_result(s).text.strip()
+        return self.recognizer.get_result(s).text.decode("utf-8", "ignore").strip()
 
     def tokens(self, s: OnlineStream) -> List[str]:
         return self.recognizer.get_result(s).tokens