-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathasrbot_local.py
72 lines (52 loc) · 2.14 KB
/
asrbot_local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
Bot to transcribe audio messages using Hugging Face's whisper-large-v3 model.
```python
python asrbot.py
```
Press Ctrl-C on the command line to stop the bot.
"""
import logging
from telegram import Update
from telegram.ext import Application, ContextTypes, MessageHandler, filters
from keys import TELEGRAM_KEY
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
import audiofile
import librosa
# Enable logging
logging.basicConfig(
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
)
# set higher logging level for httpx to avoid all GET and POST requests being logged
logging.getLogger("httpx").setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
async def voice(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Echo user audio."""
audio_file = await update.message.voice.get_file()
# load audio into numpy array
tmp_file = "voice_note.ogg"
await audio_file.download_to_drive(tmp_file)
# transcription
processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3")
target_sr = processor.feature_extractor.sampling_rate
signal, sampling_rate = audiofile.read("voice_note.ogg")
if sampling_rate != target_sr:
signal = librosa.resample(signal, orig_sr=sampling_rate, target_sr=target_sr)
inputs = processor(signal, sampling_rate=target_sr, return_tensors="pt")
input_features = inputs.input_features
generated_ids = model.generate(inputs=input_features)
output = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# respond audio
await update.message.reply_text(output)
def main() -> None:
"""Start the bot."""
# Create the Application and pass it your bot's token.
application = Application.builder().token(TELEGRAM_KEY).build()
# voice input
application.add_handler(
MessageHandler(filters.VOICE & ~filters.COMMAND, voice, block=True)
)
# Run the bot until the user presses Ctrl-C
application.run_polling(allowed_updates=Update.ALL_TYPES)
if __name__ == "__main__":
main()