Skip to content

Commit

Permalink
Do not crash transcription system if the language code is unknown
Browse files Browse the repository at this point in the history
  • Loading branch information
KillerX committed Aug 7, 2024
1 parent 554c479 commit ad166d5
Showing 1 changed file with 119 additions and 0 deletions.
119 changes: 119 additions & 0 deletions services/transcribe/transcribe.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package transcribe
import (
"context"
"fmt"
"strings"
"time"

"github.com/bcc-code/bcc-media-flows/common"
Expand Down Expand Up @@ -70,6 +71,124 @@ func DebugResponse(resp *resty.Response) {
fmt.Println(" RemoteAddr :", ti.RemoteAddr.String())
}

var whisperSupportedLanguages = map[string]bool{
"en": true,
"zh": true,
"de": true,
"es": true,
"ru": true,
"ko": true,
"fr": true,
"ja": true,
"pt": true,
"tr": true,
"pl": true,
"ca": true,
"nl": true,
"ar": true,
"sv": true,
"it": true,
"id": true,
"hi": true,
"fi": true,
"vi": true,
"he": true,
"uk": true,
"el": true,
"ms": true,
"cs": true,
"ro": true,
"da": true,
"hu": true,
"ta": true,
"no": true,
"th": true,
"ur": true,
"hr": true,
"bg": true,
"lt": true,
"la": true,
"mi": true,
"ml": true,
"cy": true,
"sk": true,
"te": true,
"fa": true,
"lv": true,
"bn": true,
"sr": true,
"az": true,
"sl": true,
"kn": true,
"et": true,
"mk": true,
"br": true,
"eu": true,
"is": true,
"hy": true,
"ne": true,
"mn": true,
"bs": true,
"kk": true,
"sq": true,
"sw": true,
"gl": true,
"mr": true,
"pa": true,
"si": true,
"km": true,
"sn": true,
"yo": true,
"so": true,
"af": true,
"oc": true,
"ka": true,
"be": true,
"tg": true,
"sd": true,
"gu": true,
"am": true,
"yi": true,
"lo": true,
"uz": true,
"fo": true,
"ht": true,
"ps": true,
"tk": true,
"nn": true,
"mt": true,
"sa": true,
"lb": true,
"my": true,
"bo": true,
"tl": true,
"mg": true,
"as": true,
"tt": true,
"haw": true,
"ln": true,
"ha": true,
"ba": true,
"jw": true,
"su": true,
"yue": true,
}

func normalizeTranscriptionLanguage(language string) string {
language = strings.ToLower(language)

if language == "auto" || language == "" {
return language
}

if ok, _ := whisperSupportedLanguages[language]; ok {
return language
}

// Try to guess the language
return "auto"
}

func DoTranscribe(
ctx context.Context,
inputFile string,
Expand Down

0 comments on commit ad166d5

Please sign in to comment.