Skip to content

Commit

Permalink
Refactor Hugging Face Whisper transcription link
Browse files Browse the repository at this point in the history
- Simplified file content decoding in `get_file_content()`
- Removed temporary file handling in transcription process
- Updated logging and error handling in transcription workflow
- Removed unnecessary dialog filtering conditions
- Simplified API request headers and content handling

The changes streamline the Whisper transcription link implementation and improve its robustness.
  • Loading branch information
howethomas committed Jan 27, 2025
1 parent 6c370de commit ec903cb
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 34 deletions.
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ services:
- redis
env_file:
- .env
stop_grace_period: 60s
stop_grace_period: 5s
networks:
- conserver

Expand Down
48 changes: 15 additions & 33 deletions server/links/hugging_face_whisper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,8 @@ def get_file_content(dialog: dict) -> bytes:
Exception: If file cannot be retrieved or verified
"""
if "body" in dialog:
# Handle inline file
if dialog.get("encoding") == "base64url":
return base64.urlsafe_b64decode(dialog["body"])
elif dialog.get("encoding") == "none":
return dialog["body"].encode('utf-8')
elif dialog.get("encoding") == "json":
return str(dialog["body"]).encode('utf-8')
else:
raise Exception(f"Unsupported encoding: {dialog.get('encoding')}")
# body contains the base64 encoded content. Decode and return
return base64.b64decode(dialog["body"])

elif "url" in dialog:
# Handle external file
Expand Down Expand Up @@ -126,18 +119,17 @@ def transcribe_hugging_face_whisper(dialog: dict, opts: dict) -> Optional[dict]:
content = get_file_content(dialog)

# Write content to temporary file
with tempfile.NamedTemporaryFile(suffix='.flac', delete=True) as temp_file:
temp_file.write(content)
temp_file.flush()

headers = {
"Accept": "application/json",
"Authorization": f"Bearer {opts['API_KEY']}",
"Content-Type": f"{opts['Content-Type']}",
}

with open(temp_file.name, "rb") as f:
response = requests.post(opts["API_URL"], headers=headers, data=f)
# with tempfile.NamedTemporaryFile(suffix='.flac', delete=True) as temp_file:
# temp_file.write(content)
# temp_file.flush()

headers = {
"Accept": "application/json",
"Authorization": "Bearer " + opts['API_KEY'],
"Content-Type": opts['Content-Type'],
}
response = requests.post(opts["API_URL"], headers=headers, data=content)
# with open(temp_file.name, "rb") as f:

return response.json()

Expand Down Expand Up @@ -184,15 +176,6 @@ def run(
)
continue

# Skip dialogs without URLs
if not dialog["url"]:
logger.info(
"whisper plugin: skipping no URL dialog %s in vCon: %s",
index,
vCon.uuid,
)
continue

# Skip short recordings
if int(dialog["duration"]) < opts["minimum_duration"]:
logger.info("Skipping short recording dialog %s in vCon: %s", index, vCon.uuid)
Expand All @@ -206,6 +189,7 @@ def run(
try:
# Attempt transcription with timing metrics
start = time.time()
logger.debug("Transcribing dialog %s in vCon: %s", index, vCon.uuid)
result = transcribe_hugging_face_whisper(dialog, opts)
stats_gauge("conserver.link.hugging_face_whisper.transcription_time", time.time() - start)
except (RetryError, Exception) as e:
Expand All @@ -218,10 +202,8 @@ def run(
stats_count("conserver.link.hugging_face_whisper.transcription_failures")
break

# Track confidence metrics
stats_gauge("conserver.link.hugging_face_whisper.confidence", result["confidence"])

logger.info("Transcribed vCon: %s", vCon.uuid)
logger.info(result)

# Prepare vendor schema without sensitive data
vendor_schema = {"opts": {k: v for k, v in opts.items() if k != "API_KEY"}}
Expand Down

0 comments on commit ec903cb

Please sign in to comment.