File size: 2,477 Bytes
887bb38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | """
Buddy CLI — Headless Push-to-Talk AI Assistant
Bruk uten GUI for testing eller som daemon.
python cli.py
"""
import sys
import threading
import time
from pynput import keyboard
from audio_io import PushToTalkRecorder, TTSEngine
from screen_capture import capture_primary_monitor
from vision_llm import MultimodalAssistant
def main():
print("=" * 60)
print(" Buddy — Push-to-Talk AI Assistant (CLI)")
print("=" * 60)
print("\nTaster:")
print(" F9 — Hold for aa snakke, slipp for aa sende")
print(" Esc — Avslutt\n")
print("Laster modeller (dette tar et minutt...)...")
recorder = PushToTalkRecorder()
tts = TTSEngine(voice="nb-NO-FinnNeural")
try:
assistant = MultimodalAssistant()
except Exception as e:
print("Feil ved modelllasting: " + str(e))
sys.exit(1)
print("Klar!\n")
recording = False
lock = threading.Lock()
def process_turn(audio_bytes):
try:
print("[STT] Transkriberer...")
transcript = assistant.transcribe_audio(audio_bytes)
print("Du: " + transcript)
print("[VLM] Ser paa skjermbildet og tenker...")
screenshot = capture_primary_monitor()
response = assistant.ask_with_image(screenshot, transcript)
print("Buddy: " + response)
print("-" * 60)
tts.speak(response, blocking=False)
except Exception as e:
print("[feil] " + str(e))
def on_press(key):
nonlocal recording
if key == keyboard.Key.f9:
with lock:
if not recording:
recording = True
recorder.start()
print("\n[Lytter... slipp F9 for aa sende]")
if key == keyboard.Key.esc:
print("\nAvslutter...")
return False
def on_release(key):
nonlocal recording
if key == keyboard.Key.f9:
with lock:
if recording:
recording = False
audio = recorder.stop()
if audio:
threading.Thread(target=process_turn, args=(audio,), daemon=True).start()
else:
print("[Ingen lyd fanget opp]\n")
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
listener.join()
print("Ha det!")
if __name__ == "__main__":
main()
|