-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
143 lines (109 loc) · 3.91 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import pyperclip
import pyaudio
import wave
import whisper
import os
import threading
import torch
import warnings
import time
from pynput import keyboard
from playsound import playsound # Add this for playing beep sounds
WAVE_OUTPUT_FILENAME = "generated/output.wav"
TRANSCRIPTION_OUTPUT_FILENAME = "generated/transcription.txt"
START_BEEP_FILENAME = "sounds/beep-06.wav" # Audio file for start beep
STOP_BEEP_FILENAME = "sounds/beep-08b.wav" # Audio file for stop beep
COPY_BEEP_FILENAME = "sounds/beep-24.wav" # Audio file for copy-to-clipboard beep
# Clean up: remove the WAV file if you no longer need it
try:
os.remove(WAVE_OUTPUT_FILENAME)
except:
pass
warnings.filterwarnings("ignore", category=FutureWarning)
if torch.cuda.is_available():
print("Using GPU:", torch.cuda.get_device_name(0))
else:
print("Using CPU")
# Global flags and variables
recording = False
stop_recording = False
key_presses = [] # To track recent 'a' keypress times
# Initialize PyAudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024
audio = pyaudio.PyAudio()
# Function to handle recording and transcription
def record_and_transcribe():
global recording, stop_recording
# Play start beep
playsound(START_BEEP_FILENAME)
# Open a stream on the first available input device
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
frames = []
print("Recording...")
while not stop_recording:
data = stream.read(CHUNK)
frames.append(data)
# Stop and close the stream
stream.stop_stream()
stream.close()
# Play stop beep
playsound(STOP_BEEP_FILENAME)
# Save the recorded data as a WAV file
with wave.open(WAVE_OUTPUT_FILENAME, 'wb') as wf:
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
print("Recording stopped. Transcribing...")
# Load Whisper model and transcribe audio
model = whisper.load_model("base", device="cuda" if torch.cuda.is_available() else "cpu")
result = model.transcribe(WAVE_OUTPUT_FILENAME)
# Write the transcription result to a file
with open(TRANSCRIPTION_OUTPUT_FILENAME, 'w') as f:
f.write(result["text"])
print(f"Transcription saved to {TRANSCRIPTION_OUTPUT_FILENAME}")
# Copy the transcription to clipboard
with open(TRANSCRIPTION_OUTPUT_FILENAME, 'r') as f:
transcription_text = f.read()
pyperclip.copy(transcription_text)
print("Transcription copied to clipboard!")
# Play copy-to-clipboard beep
playsound(COPY_BEEP_FILENAME)
# Reset flags for the next round
recording = False
stop_recording = False
def toggle_recording():
global recording, stop_recording
if not recording:
# Start recording
recording = True
stop_recording = False
record_thread = threading.Thread(target=record_and_transcribe)
record_thread.start()
else:
# Stop recording
stop_recording = True
def on_press(key):
global key_presses
# Check if the key pressed is 'a'
if key == keyboard.KeyCode(char=';'):
# Record the current time of the key press
current_time = time.time()
key_presses.append(current_time)
# Remove key presses older than 1 second
key_presses = [t for t in key_presses if current_time - t < 1]
# Check if there were 3 ";" presses within the last second
if len(key_presses) >= 2:
toggle_recording()
key_presses = [] # Reset after triggering
# Start listening to the hotkey pattern
print("Listening for ';' pressed 2 times in rapid succession to start/stop recording...")
with keyboard.Listener(on_press=on_press) as listener:
listener.join() # Keep the listener running indefinitely
# Cleanup
audio.terminate()