Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CMake option to disable/enable TTS support #500

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON)
option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON)
option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
option(SHERPA_ONNX_ENABLE_TTS "Whether to build with TTS capability" ON)

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
Expand Down Expand Up @@ -99,6 +100,8 @@ message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}")
message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")


if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
Expand Down Expand Up @@ -193,10 +196,12 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET)
include(asio)
endif()

include(espeak-ng-for-piper)
set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
include(piper-phonemize)
if(SHERPA_ONNX_ENABLE_TTS)
include(espeak-ng-for-piper)
set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
include(piper-phonemize)
endif()

add_subdirectory(sherpa-onnx)

Expand Down
10 changes: 9 additions & 1 deletion sherpa-onnx/c-api/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
include_directories(${CMAKE_SOURCE_DIR})
add_library(sherpa-onnx-c-api c-api.cc)

if(SHERPA_ONNX_ENABLE_TTS)
target_sources(sherpa-onnx-c-api PRIVATE c-api-tts.cc)
endif()

target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core)

if(BUILD_SHARED_LIBS)
target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1)
target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1)
endif()

if(SHERPA_ONNX_ENABLE_TTS)
target_compile_definitions(sherpa-onnx-c-api PUBLIC SHERPA_ONNX_ENABLE_TTS=1)
endif()

install(TARGETS sherpa-onnx-c-api DESTINATION lib)

install(FILES c-api.h
DESTINATION include/sherpa-onnx/c-api
)

98 changes: 98 additions & 0 deletions sherpa-onnx/c-api/c-api-tts.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// sherpa-onnx/c-api/c-api-tts.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "sherpa-onnx/c-api/c-api.h"

#include <cstdio>
#include <memory>

#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/wave-writer.h"

#define SHERPA_ONNX_OR(x, y) (x ? x : y)

struct SherpaOnnxOfflineTts {
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
};

SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
const SherpaOnnxOfflineTtsConfig *config) {
sherpa_onnx::OfflineTtsConfig tts_config;

tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
tts_config.model.vits.lexicon =
SHERPA_ONNX_OR(config->model.vits.lexicon, "");
tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
tts_config.model.vits.data_dir =
SHERPA_ONNX_OR(config->model.vits.data_dir, "");
tts_config.model.vits.noise_scale =
SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
tts_config.model.vits.noise_scale_w =
SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
tts_config.model.vits.length_scale =
SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);

tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);

if (tts_config.model.debug) {
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
}

SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;

tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);

return tts;
}

void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }

int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
return tts->impl->SampleRate();
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
float speed) {
return SherpaOnnxOfflineTtsGenerateWithCallback(tts, text, sid, speed,
nullptr);
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioCallback callback) {
sherpa_onnx::GeneratedAudio audio =
tts->impl->Generate(text, sid, speed, callback);

if (audio.samples.empty()) {
return nullptr;
}

SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;

float *samples = new float[audio.samples.size()];
std::copy(audio.samples.begin(), audio.samples.end(), samples);

ans->samples = samples;
ans->n = audio.samples.size();
ans->sample_rate = audio.sample_rate;

return ans;
}

void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
const SherpaOnnxGeneratedAudio *p) {
if (p) {
delete[] p->samples;
delete p;
}
}

int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t sample_rate, const char *filename) {
return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
}
88 changes: 0 additions & 88 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-writer.h"

struct SherpaOnnxOnlineRecognizer {
std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
Expand Down Expand Up @@ -534,89 +532,3 @@ void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
p->impl->Reset();
}

struct SherpaOnnxOfflineTts {
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
};

SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
const SherpaOnnxOfflineTtsConfig *config) {
sherpa_onnx::OfflineTtsConfig tts_config;

tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
tts_config.model.vits.lexicon =
SHERPA_ONNX_OR(config->model.vits.lexicon, "");
tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
tts_config.model.vits.data_dir =
SHERPA_ONNX_OR(config->model.vits.data_dir, "");
tts_config.model.vits.noise_scale =
SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
tts_config.model.vits.noise_scale_w =
SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
tts_config.model.vits.length_scale =
SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);

tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);

if (tts_config.model.debug) {
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
}

SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;

tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);

return tts;
}

void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }

int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
return tts->impl->SampleRate();
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
float speed) {
return SherpaOnnxOfflineTtsGenerateWithCallback(tts, text, sid, speed,
nullptr);
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
SherpaOnnxGeneratedAudioCallback callback) {
sherpa_onnx::GeneratedAudio audio =
tts->impl->Generate(text, sid, speed, callback);

if (audio.samples.empty()) {
return nullptr;
}

SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;

float *samples = new float[audio.samples.size()];
std::copy(audio.samples.begin(), audio.samples.end(), samples);

ans->samples = samples;
ans->n = audio.samples.size();
ans->sample_rate = audio.sample_rate;

return ans;
}

void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
const SherpaOnnxGeneratedAudio *p) {
if (p) {
delete[] p->samples;
delete p;
}
}

int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t sample_rate, const char *filename) {
return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
}
4 changes: 4 additions & 0 deletions sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,8 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
SherpaOnnxVoiceActivityDetector *p);

#if defined(SHERPA_ONNX_ENABLE_TTS)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should probably invert this, so as to not break the examples.


// ============================================================
// For offline Text-to-Speech (i.e., non-streaming TTS)
// ============================================================
Expand Down Expand Up @@ -677,6 +679,8 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
int32_t sample_rate,
const char *filename);

#endif // SHERPA_ONNX_ENABLE_TTS

#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
Expand Down
Loading
Loading