From 0089aafcfd59d9b38dd3146b33a95f2f72c123e7 Mon Sep 17 00:00:00 2001
From: agentmarketbot <agentmarketbot@gmail.com>
Date: Sun, 26 Jan 2025 15:23:29 +0000
Subject: [PATCH] Add OpenAI Whisper API as transcription alternative

Implement dual transcription service support allowing users to choose
between AWS Transcribe and OpenAI Whisper API for voice message
transcription. Changes include:

- Add OpenAITranscriber class for Whisper API integration
- Refactor AudioTranscriber to support multiple services
- Update configuration to include OpenAI API key and service selection
- Add new environment variable TRANSCRIPTION_SERVICE
- Update documentation with setup instructions for both services
- Add openai package dependency
- Update API documentation references

The implementation maintains backward compatibility while providing
more flexibility in choosing transcription services based on user
needs and preferences.
---
 README.md       | 35 +++++++++++++++++++++++++--------
 bot_handlers.py | 12 +++++++++---
 config.py       |  2 ++
 pyproject.toml  |  1 +
 services.py     | 52 +++++++++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 89 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 97e441a..f7c7d0c 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,8 @@ GroupLang-secretary-bot is a Telegram bot that transcribes voice messages, summa
 
 ## Features
 
-- Transcribes voice messages using AWS Transcribe
+- Transcribes voice messages using either AWS Transcribe or OpenAI Whisper API
+- Flexible choice of transcription service (AWS or OpenAI)
 - Summarizes transcribed text using a custom API
 - Allows users to tip for the service
 - Secures handling of API keys and tokens
@@ -25,7 +26,9 @@ GroupLang-secretary-bot is a Telegram bot that transcribes voice messages, summa
 ## Prerequisites
 
 - Poetry for dependency management
-- AWS account with Transcribe access
+- Either:
+  - AWS account with Transcribe access, OR
+  - OpenAI API key for Whisper API
 - Telegram Bot Token
 - MarketRouter API Key
 
@@ -70,13 +73,24 @@ To quickly get started with the GroupLang-secretary-bot, follow these steps:
 
 1. Set up environment variables:
    - `TELEGRAM_BOT_TOKEN`: Your Telegram Bot Token
+   - `MARKETROUTER_API_KEY`: Your MarketRouter API Key
+   - `TRANSCRIPTION_SERVICE`: Choose between 'aws' or 'openai' (default: 'aws')
+   
+   For AWS Transcribe:
    - `AWS_ACCESS_KEY_ID`: Your AWS Access Key ID
    - `AWS_SECRET_ACCESS_KEY`: Your AWS Secret Access Key
-   - `MARKETROUTER_API_KEY`: Your MarketRouter API Key
+   
+   For OpenAI Whisper:
+   - `OPENAI_API_KEY`: Your OpenAI API Key
+
+2. Configure credentials based on your chosen transcription service:
 
-2. Configure AWS credentials:
-   - Either set up the AWS CLI with `aws configure` or use environment variables as mentioned above.
-   - Ensure that your AWS IAM user has the necessary permissions for AWS Transcribe.
+   For AWS Transcribe:
+   - Either set up the AWS CLI with `aws configure` or use environment variables as mentioned above
+   - Ensure that your AWS IAM user has the necessary permissions for AWS Transcribe
+   
+   For OpenAI Whisper:
+   - Ensure you have a valid OpenAI API key with access to the Whisper API
 
 1. Activate the Poetry virtual environment:
    ```
@@ -139,7 +153,12 @@ poetry update package_name
 
 The bot uses the following external APIs:
 
-- AWS Transcribe: For audio transcription
+- For audio transcription (configurable):
+  - AWS Transcribe: Amazon's speech-to-text service
+  - OpenAI Whisper API: OpenAI's speech recognition model
 - MarketRouter API: For text summarization and reward submission
 
-Refer to the respective documentation for more details on these APIs.
+Refer to the respective documentation for more details:
+- [AWS Transcribe Documentation](https://docs.aws.amazon.com/transcribe/)
+- [OpenAI Whisper API Documentation](https://platform.openai.com/docs/guides/speech-to-text)
+- MarketRouter API Documentation
diff --git a/bot_handlers.py b/bot_handlers.py
index 74b6986..0149074 100644
--- a/bot_handlers.py
+++ b/bot_handlers.py
@@ -8,9 +8,15 @@
 logger = logging.getLogger(__name__)
 
 # Initialize services
-aws_services = AWSServices()
-audio_transcriber = AudioTranscriber(aws_services)
-text_summarizer = TextSummarizer(os.environ.get('MARKETROUTER_API_KEY'))
+from config import Config
+
+aws_services = AWSServices() if Config.TRANSCRIPTION_SERVICE == 'aws' else None
+audio_transcriber = AudioTranscriber(
+    aws_services=aws_services,
+    openai_api_key=Config.OPENAI_API_KEY,
+    service=Config.TRANSCRIPTION_SERVICE
+)
+text_summarizer = TextSummarizer(Config.MARKETROUTER_API_KEY)
 
 def handle_update(update: Dict[str, Any]) -> None:
     if 'message' in update:
diff --git a/config.py b/config.py
index 4b02d69..38526c3 100644
--- a/config.py
+++ b/config.py
@@ -6,3 +6,5 @@ class Config:
     MARKETROUTER_API_KEY = os.environ.get('MARKETROUTER_API_KEY')
     AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
     AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
+    OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
+    TRANSCRIPTION_SERVICE = os.environ.get('TRANSCRIPTION_SERVICE', 'aws')  # 'aws' or 'openai'
diff --git a/pyproject.toml b/pyproject.toml
index 39d0ada..690d7ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ requests = "^2.32.3"
 nltk = "^3.9.1"
 langdetect = "^1.0.9"
 mangum = "^0.18.0"
+openai = "^1.12.0"
 
 [tool.poetry.dev-dependencies]
 # Add any development dependencies here
diff --git a/services.py b/services.py
index 8d81328..bb57d6e 100644
--- a/services.py
+++ b/services.py
@@ -50,7 +50,38 @@ def start_transcription_job(self, job_name, media_uri, media_format='ogg', langu
     def get_transcription_job_status(self, job_name):
         return self.transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
 
-class AudioTranscriber:
+class OpenAITranscriber:
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+        self.api_url = "https://api.openai.com/v1/audio/transcriptions"
+
+    def transcribe_audio(self, file_url: str) -> str:
+        try:
+            audio_content = self._download_audio(file_url)
+            
+            headers = {
+                "Authorization": f"Bearer {self.api_key}"
+            }
+            
+            files = {
+                'file': ('audio.ogg', audio_content, 'audio/ogg'),
+                'model': (None, 'whisper-1'),
+            }
+            
+            response = requests.post(self.api_url, headers=headers, files=files)
+            response.raise_for_status()
+            
+            return response.json()['text']
+        except Exception as e:
+            logger.error(f"An error occurred with OpenAI transcription: {e}")
+            raise
+
+    def _download_audio(self, file_url: str) -> bytes:
+        response = requests.get(file_url)
+        response.raise_for_status()
+        return response.content
+
+class AWSTranscriber:
     def __init__(self, aws_services: AWSServices):
         self.aws_services = aws_services
         self.bucket_name = 'audio-transcribe-temp'
@@ -74,7 +105,7 @@ def transcribe_audio(self, file_url: str) -> str:
 
             return transcription
         except Exception as e:
-            logger.error(f"An error occurred: {e}")
+            logger.error(f"An error occurred with AWS transcription: {e}")
             raise
 
     def _download_audio(self, file_url: str) -> bytes:
@@ -95,6 +126,23 @@ def _wait_for_transcription(self, job_name: str) -> str:
         else:
             raise Exception("Transcription failed")
 
+class AudioTranscriber:
+    def __init__(self, aws_services: Optional[AWSServices] = None, openai_api_key: Optional[str] = None, service: str = 'aws'):
+        self.service = service.lower()
+        if self.service == 'aws':
+            if not aws_services:
+                raise ValueError("AWS services required for AWS transcription")
+            self.transcriber = AWSTranscriber(aws_services)
+        elif self.service == 'openai':
+            if not openai_api_key:
+                raise ValueError("OpenAI API key required for OpenAI transcription")
+            self.transcriber = OpenAITranscriber(openai_api_key)
+        else:
+            raise ValueError(f"Unsupported transcription service: {service}")
+
+    def transcribe_audio(self, file_url: str) -> str:
+        return self.transcriber.transcribe_audio(file_url)
+
 class TextSummarizer:
     def __init__(self, api_key: str):
         self.api_key = api_key