Skip to content

Commit

Permalink
Merge pull request #13 from mideind/nocache
Browse files Browse the repository at this point in the history
Allow audio cache size of 0 (disables caching)
  • Loading branch information
sveinbjornt authored Oct 10, 2024
2 parents 8d19e78 + 3cd63ad commit 7073cd9
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 158 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ jobs:

strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "pypy-3.9"]
python-version: ["3.9", "3.12", "pypy-3.10"]

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -36,18 +36,18 @@ jobs:
- name: Test with pytest
run: |
pytest --run-slow -vvvrP --log-level=DEBUG --capture=tee-sys
- name: Lint with pre-commit hooks
run: |
pre-commit run --all-files
# - name: Lint with pre-commit hooks
# run: |
# pre-commit run --all-files

network:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip wheel setuptools
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ dependencies = [
"typing-extensions>=4.7.1",
"pydantic==2.4.0",
"pydantic-settings>=2.0.3",
"cachetools>=5.3.1",
"cachetools>=5.5.0",
# For parsing Icelandic text
"islenska<1.0.0",
"islenska==1.0.3",
"reynir<4.0.0",
"tokenizer<4.0.0",
# Azure TTS
Expand Down
55 changes: 37 additions & 18 deletions src/icespeak/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# We dont import annotations from __future__ here
# due to pydantic
from typing import Any, Optional
from typing_extensions import Literal

import json
import os
Expand Down Expand Up @@ -93,7 +92,9 @@ class Settings(BaseSettings):
extra="ignore",
)

DEFAULT_VOICE: str = Field(default="Gudrun", description="Default TTS voice if none is requested.")
DEFAULT_VOICE: str = Field(
default="Gudrun", description="Default TTS voice if none is requested."
)
DEFAULT_VOICE_SPEED: float = Field(
default=1.0,
le=MAX_SPEED,
Expand All @@ -104,7 +105,9 @@ class Settings(BaseSettings):
default=TextFormats.SSML,
description="Default format to interpret input text as.",
)
DEFAULT_AUDIO_FORMAT: AudioFormats = Field(default=AudioFormats.MP3, description="Default audio output format.")
DEFAULT_AUDIO_FORMAT: AudioFormats = Field(
default=AudioFormats.MP3, description="Default audio output format."
)

AUDIO_DIR: Optional[Path] = Field(
default=None,
Expand All @@ -113,10 +116,16 @@ class Settings(BaseSettings):
"If not set, creates a directory in the platform's temporary directory."
),
)
AUDIO_CACHE_SIZE: int = Field(default=300, gt=0, description="Max number of audio files to cache.")
AUDIO_CACHE_CLEAN: bool = Field(default=True, description="If True, cleans up generated audio files upon exit.")
AUDIO_CACHE_SIZE: int = Field(
default=300, gt=-1, description="Max number of audio files to cache."
)
AUDIO_CACHE_CLEAN: bool = Field(
default=True, description="If True, cleans up generated audio files upon exit."
)

KEYS_DIR: Path = Field(default=Path("keys"), description="Where to look for API keys.")
KEYS_DIR: Path = Field(
default=Path("keys"), description="Where to look for API keys."
)
AWSPOLLY_KEY_FILENAME: str = Field(
default="AWSPollyServerKey.json",
description="Name of the AWS Polly API key file.",
Expand Down Expand Up @@ -181,26 +190,24 @@ class Keys(BaseModel):

azure: Optional[AzureKey] = Field(default=None, description="Azure API key.")
aws: Optional[AWSPollyKey] = Field(default=None, description="AWS Polly API key.")
google: Optional[dict[str, Any]] = Field(default=None, description="Google API key.")
# TODO: Re-implement TTS with Tiro
tiro: Literal[None] = Field(default=None)
google: Optional[dict[str, Any]] = Field(
default=None, description="Google API key."
)
openai: Optional[OpenAIKey] = Field(default=None, description="OpenAI API key.")

def __hash__(self):
return hash((self.azure, self.aws, self.google, self.tiro, self.openai))
return hash((self.azure, self.aws, self.google, self.openai))

def __eq__(self, other: object):
return isinstance(other, Keys) and (
self.azure,
self.aws,
self.google,
self.tiro,
self.openai,
) == (
other.azure,
other.aws,
other.google,
other.tiro,
other.openai,
)

Expand All @@ -209,23 +216,33 @@ def __eq__(self, other: object):

_kd = SETTINGS.KEYS_DIR
if not (_kd.exists() and _kd.is_dir()):
_LOG.warning("Keys directory missing or incorrect, TTS will not work! Set to: %s", _kd)
_LOG.warning(
"Keys directory missing or incorrect, TTS will not work! Set to: %s", _kd
)
else:
# Load API keys, logging exceptions in level DEBUG so they aren't logged twice,
# as exceptions are logged as warnings when voice modules are initialized
try:
API_KEYS.aws = AWSPollyKey.model_validate_json((_kd / SETTINGS.AWSPOLLY_KEY_FILENAME).read_text().strip())
API_KEYS.aws = AWSPollyKey.model_validate_json(
(_kd / SETTINGS.AWSPOLLY_KEY_FILENAME).read_text().strip()
)
except Exception as err:
_LOG.debug(
"Could not load AWS Polly API key, ASR with AWS Polly will not work. Error: %s",
err,
)
try:
API_KEYS.azure = AzureKey.model_validate_json((_kd / SETTINGS.AZURE_KEY_FILENAME).read_text().strip())
API_KEYS.azure = AzureKey.model_validate_json(
(_kd / SETTINGS.AZURE_KEY_FILENAME).read_text().strip()
)
except Exception as err:
_LOG.debug("Could not load Azure API key, ASR with Azure will not work. Error: %s", err)
_LOG.debug(
"Could not load Azure API key, ASR with Azure will not work. Error: %s", err
)
try:
API_KEYS.google = json.loads((_kd / SETTINGS.GOOGLE_KEY_FILENAME).read_text().strip())
API_KEYS.google = json.loads(
(_kd / SETTINGS.GOOGLE_KEY_FILENAME).read_text().strip()
)
except Exception as err:
_LOG.debug(
"Could not load Google API key, ASR with Google will not work. Error: %s",
Expand All @@ -236,7 +253,9 @@ def __eq__(self, other: object):
if key := os.getenv("OPENAI_API_KEY"):
API_KEYS.openai = OpenAIKey(api_key=SecretStr(key))
else:
API_KEYS.openai = OpenAIKey.model_validate_json((_kd / SETTINGS.OPENAI_KEY_FILENAME).read_text().strip())
API_KEYS.openai = OpenAIKey.model_validate_json(
(_kd / SETTINGS.OPENAI_KEY_FILENAME).read_text().strip()
)
except Exception as err:
_LOG.debug(
"Could not load OpenAI API key, ASR with OpenAI will not work. Error: %s",
Expand Down
38 changes: 28 additions & 10 deletions src/icespeak/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,14 @@

from .settings import SETTINGS, TRACE, Keys
from .transcribe import TranscriptionOptions

# TODO: Re implement Tiro
from .voices import BaseVoice, TTSOptions, VoiceInfoT, aws_polly, azure, openai # , google
from .voices import (
BaseVoice,
TTSOptions,
VoiceInfoT,
aws_polly,
azure,
openai,
)

if TYPE_CHECKING:
from pathlib import Path
Expand All @@ -60,7 +65,6 @@ def _setup_voices() -> tuple[VoicesT, ServicesT]:
azure.AzureVoice(),
openai.OpenAIVoice(),
# google.GoogleVoice(),
# tiro.TiroVoice(),
)
voices: VoicesT = {}
for service in services:
Expand All @@ -72,7 +76,8 @@ def _setup_voices() -> tuple[VoicesT, ServicesT]:
# Info about each voice
if voice in voices:
_LOG.warning(
"Voice named %r already exists! " + "Skipping the one defined in module %s.",
"Voice named %r already exists! "
+ "Skipping the one defined in module %s.",
voice,
service.name,
)
Expand Down Expand Up @@ -122,7 +127,9 @@ def _cleanup():
audiofile.unlink(missing_ok=True)

# Small daemon thread which deletes files sent to the expired queue
_cleanup_thread = threading.Thread(target=_cleanup, name="audio_cleanup", daemon=True)
_cleanup_thread = threading.Thread(
target=_cleanup, name="audio_cleanup", daemon=True
)
_cleanup_thread.start()

def _evict_all():
Expand Down Expand Up @@ -168,11 +175,20 @@ def tts_to_file(
"""
if _LOG.isEnabledFor(DEBUG):
_LOG.debug(
"tts_to_file, text: %r, TTS options: %s, " + "transcribe: %r, transcription options: %s",
"tts_to_file, text: %r, TTS options: %s, "
+ "transcribe: %r, transcription options: %s",
text,
tts_options.model_dump(exclude_defaults=True) or "<default>" if tts_options else "None",
(
tts_options.model_dump(exclude_defaults=True) or "<default>"
if tts_options
else "None"
),
transcribe,
transcription_options.model_dump(exclude_defaults=True) or "<default>" if transcription_options else "None",
(
transcription_options.model_dump(exclude_defaults=True) or "<default>"
if transcription_options
else "None"
),
)
tts_options = tts_options or TTSOptions()
try:
Expand All @@ -181,7 +197,9 @@ def tts_to_file(
raise ValueError(f"Voice {tts_options.voice!r} not available.") from e

if tts_options.audio_format not in service.audio_formats:
raise ValueError(f"Service {service.name} doesn't support audio format {tts_options.audio_format}.")
raise ValueError(
f"Service {service.name} doesn't support audio format {tts_options.audio_format}."
)

if transcribe:
transcription_options = transcription_options or TranscriptionOptions()
Expand Down
106 changes: 0 additions & 106 deletions src/icespeak/voices/tiro.py

This file was deleted.

14 changes: 0 additions & 14 deletions tests/test_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,20 +125,6 @@ def test_Google_speech_synthesis():
path.unlink()


@pytest.mark.skipif(condition=True, reason="Missing Tiro API Key.")
@pytest.mark.network
def test_Tiro_speech_synthesis():
# Test Tiro
tts_out = tts_to_file(
_TEXT,
TTSOptions(text_format=TextFormats.TEXT, audio_format="mp3", voice="Alfur"),
)
path = tts_out.file
assert path.is_file(), "Expected audio file to exist"
assert path.stat().st_size > _MIN_AUDIO_SIZE, "Expected longer audio data"
path.unlink()


@pytest.mark.skipif(API_KEYS.openai is None, reason="Missing OpenAI API Key.")
@pytest.mark.network
def test_OpenAI_speech_synthesis():
Expand Down

0 comments on commit 7073cd9

Please sign in to comment.