refactor: Update model-downloader.cpp to use obs_module_config_path f… (

#134) * refactor: Update model-downloader.cpp to use obs_module_config_path for retrieving the config folder path - Replace the usage of obs_module_get_config_path with obs_module_config_path to retrieve the config folder path in model-downloader.cpp - Add a check for a null config_folder and log an info message if it is null - Convert the config_folder string to a wstring on Windows using MultiByteToWideChar - Update the log messages to provide more descriptive information about the config models folder and the model folder existence in the config folder - Use the updated config_folder_str in the std::filesystem::absolute function call * Trigger Build * refactor: Update model-downloader.cpp to use obs_module_config_path for retrieving the config folder path * refactor: Fix bug in transcription filter callbacks - Add a condition to check for null timestamps before saving the sentence to srt in the send_sentence_to_file function - Remove unnecessary code in the set_text_callback function that checks for empty text after suppression - Update the whisper_loop function to clear the current subtitle if the minimum subtitle duration has passed
locaal-ai · Jul 11, 2024 · 58f9131 · 58f9131
1 parent 234a938
commit 58f9131
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 25 deletions.
diff --git a/src/model-utils/model-downloader-ui.cpp b/src/model-utils/model-downloader-ui.cpp
@@ -125,11 +125,23 @@ std::string get_filename_from_url(const std::string &url)
 
 void ModelDownloadWorker::download_model()
 {
-	char *config_folder = obs_module_get_config_path(obs_current_module(), "models");
-	const std::filesystem::path module_config_models_folder =
-		std::filesystem::absolute(config_folder);
+	char *config_folder = obs_module_config_path("models");
+#ifdef _WIN32
+	// convert mbstring to wstring
+	int count = MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), NULL, 0);
+	std::wstring config_folder_str(count, 0);
+	MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), &config_folder_str[0],
+			    count);
+	obs_log(LOG_INFO, "Download: Config models folder: %S", config_folder_str.c_str());
+#else
+	std::string config_folder_str = config_folder;
+	obs_log(LOG_INFO, "Download: Config models folder: %s", config_folder_str.c_str());
+#endif
 	bfree(config_folder);
 
+	const std::filesystem::path module_config_models_folder =
+		std::filesystem::absolute(config_folder_str);
+
 	// Check if the config folder exists
 	if (!std::filesystem::exists(module_config_models_folder)) {
 		obs_log(LOG_WARNING, "Config folder does not exist: %s",

diff --git a/src/model-utils/model-downloader.cpp b/src/model-utils/model-downloader.cpp
@@ -27,9 +27,25 @@ std::string find_model_folder(const ModelInfo &model_info)
 	}
 
 	// Check if model exists in the config folder
-	char *config_folder = obs_module_get_config_path(obs_current_module(), "models");
+	char *config_folder = obs_module_config_path("models");
+	if (!config_folder) {
+		obs_log(LOG_INFO, "Config folder not set.");
+		return "";
+	}
+#ifdef _WIN32
+	// convert mbstring to wstring
+	int count = MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), NULL, 0);
+	std::wstring config_folder_str(count, 0);
+	MultiByteToWideChar(CP_UTF8, 0, config_folder, strlen(config_folder), &config_folder_str[0],
+			    count);
+	obs_log(LOG_INFO, "Config models folder: %S", config_folder_str.c_str());
+#else
+	std::string config_folder_str = config_folder;
+	obs_log(LOG_INFO, "Config models folder: %s", config_folder_str.c_str());
+#endif
+
 	const std::filesystem::path module_config_models_folder =
-		std::filesystem::absolute(config_folder);
+		std::filesystem::absolute(config_folder_str);
 	bfree(config_folder);
 
 	obs_log(LOG_INFO, "Checking if model '%s' exists in config...",
@@ -38,9 +54,9 @@ std::string find_model_folder(const ModelInfo &model_info)
 	const std::string model_local_config_path =
 		(module_config_models_folder / model_info.local_folder_name).string();
 
-	obs_log(LOG_INFO, "Model path in config: %s", model_local_config_path.c_str());
+	obs_log(LOG_INFO, "Lookig for model in config: %s", model_local_config_path.c_str());
 	if (std::filesystem::exists(model_local_config_path)) {
-		obs_log(LOG_INFO, "Model exists in config folder: %s",
+		obs_log(LOG_INFO, "Model folder exists in config folder: %s",
 			model_local_config_path.c_str());
 		return model_local_config_path;
 	}

diff --git a/src/transcription-filter-callbacks.cpp b/src/transcription-filter-callbacks.cpp
@@ -98,6 +98,11 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
 		output_file << str_copy << std::endl;
 		output_file.close();
 	} else {
+		if (result.start_timestamp_ms == 0 && result.end_timestamp_ms == 0) {
+			// No timestamps, do not save the sentence to srt
+			return;
+		}
+
 		obs_log(gf->log_level, "Saving sentence to file %s, sentence #%d",
 			gf->output_file_path.c_str(), gf->sentence_number);
 		// Append sentence to file in .srt format
@@ -147,18 +152,10 @@ void set_text_callback(struct transcription_filter_data *gf,
 		       const DetectionResultWithText &resultIn)
 {
 	DetectionResultWithText result = resultIn;
-	uint64_t now = now_ms();
-	if (result.text.empty() || result.result != DETECTION_RESULT_SPEECH) {
-		// check if we should clear the current sub depending on the minimum subtitle duration
-		if ((now - gf->last_sub_render_time) > gf->min_sub_duration) {
-			// clear the current sub, run an empty sub
-			result.text = "";
-		} else {
-			// nothing to do, the incoming sub is empty
-			return;
-		}
+	if (!result.text.empty() && result.result == DETECTION_RESULT_SPEECH) {
+		gf->last_sub_render_time = now_ms();
+		gf->cleared_last_sub = false;
 	}
-	gf->last_sub_render_time = now;
 
 	std::string str_copy = result.text;
 
@@ -186,10 +183,6 @@ void set_text_callback(struct transcription_filter_data *gf,
 			obs_log(gf->log_level, "------ Suppressed text: '%s' -> '%s'",
 				original_str_copy.c_str(), str_copy.c_str());
 		}
-		if (remove_leading_trailing_nonalpha(str_copy).empty()) {
-			// if the text is empty after suppression, return
-			return;
-		}
 	}
 
 	if (gf->buffered_output) {

diff --git a/src/transcription-filter-data.h b/src/transcription-filter-data.h
@@ -38,6 +38,7 @@ struct transcription_filter_data {
 	size_t min_sub_duration;
 	// Last time a subtitle was rendered
 	uint64_t last_sub_render_time;
+	bool cleared_last_sub;
 
 	/* PCM buffers */
 	float *copy_buffers[MAX_PREPROC_CHANNELS];

diff --git a/src/transcription-filter.cpp b/src/transcription-filter.cpp
@@ -186,7 +186,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
 	gf->sentence_number = 1;
 	gf->process_while_muted = obs_data_get_bool(s, "process_while_muted");
 	gf->min_sub_duration = (int)obs_data_get_int(s, "min_sub_duration");
-	gf->last_sub_render_time = 0;
+	gf->last_sub_render_time = now_ms();
 	bool new_buffered_output = obs_data_get_bool(s, "buffered_output");
 	int new_buffer_num_lines = (int)obs_data_get_int(s, "buffer_num_lines");
 	int new_buffer_num_chars_per_line = (int)obs_data_get_int(s, "buffer_num_chars_per_line");
@@ -428,7 +428,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
 	gf->frames = (size_t)((float)gf->sample_rate / (1000.0f / MAX_MS_WORK_BUFFER));
 	gf->last_num_frames = 0;
 	gf->min_sub_duration = (int)obs_data_get_int(settings, "min_sub_duration");
-	gf->last_sub_render_time = 0;
+	gf->last_sub_render_time = now_ms();
 	gf->log_level = (int)obs_data_get_int(settings, "log_level");
 	gf->save_srt = obs_data_get_bool(settings, "subtitle_save_srt");
 	gf->truncate_output_file = obs_data_get_bool(settings, "truncate_output_file");

diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp
@@ -504,7 +504,20 @@ void whisper_loop(void *data)
 			current_vad_state = vad_based_segmentation(gf, current_vad_state);
 		}
 
-		// Sleep for 10 ms using the condition variable wshiper_thread_cv
+		if (!gf->cleared_last_sub) {
+			// check if we should clear the current sub depending on the minimum subtitle duration
+			uint64_t now = now_ms();
+			if ((now - gf->last_sub_render_time) > gf->min_sub_duration) {
+				// clear the current sub, call the callback with an empty string
+				obs_log(LOG_INFO,
+					"Clearing current subtitle. now: %lu ms, last: %lu ms", now,
+					gf->last_sub_render_time);
+				set_text_callback(gf, {DETECTION_RESULT_UNKNOWN, "", 0, 0, {}});
+				gf->cleared_last_sub = true;
+			}
+		}
+
+		// Sleep using the condition variable wshiper_thread_cv
 		// This will wake up the thread if there is new data in the input buffer
 		// or if the whisper context is null
 		std::unique_lock<std::mutex> lock(gf->whisper_ctx_mutex);