buddy-desktop / cli.py
carbonx's picture
Add cli.py
887bb38 verified
"""
Buddy CLI — Headless Push-to-Talk AI Assistant
Bruk uten GUI for testing eller som daemon.
python cli.py
"""
import sys
import threading
import time
from pynput import keyboard
from audio_io import PushToTalkRecorder, TTSEngine
from screen_capture import capture_primary_monitor
from vision_llm import MultimodalAssistant
def main():
print("=" * 60)
print(" Buddy — Push-to-Talk AI Assistant (CLI)")
print("=" * 60)
print("\nTaster:")
print(" F9 — Hold for aa snakke, slipp for aa sende")
print(" Esc — Avslutt\n")
print("Laster modeller (dette tar et minutt...)...")
recorder = PushToTalkRecorder()
tts = TTSEngine(voice="nb-NO-FinnNeural")
try:
assistant = MultimodalAssistant()
except Exception as e:
print("Feil ved modelllasting: " + str(e))
sys.exit(1)
print("Klar!\n")
recording = False
lock = threading.Lock()
def process_turn(audio_bytes):
try:
print("[STT] Transkriberer...")
transcript = assistant.transcribe_audio(audio_bytes)
print("Du: " + transcript)
print("[VLM] Ser paa skjermbildet og tenker...")
screenshot = capture_primary_monitor()
response = assistant.ask_with_image(screenshot, transcript)
print("Buddy: " + response)
print("-" * 60)
tts.speak(response, blocking=False)
except Exception as e:
print("[feil] " + str(e))
def on_press(key):
nonlocal recording
if key == keyboard.Key.f9:
with lock:
if not recording:
recording = True
recorder.start()
print("\n[Lytter... slipp F9 for aa sende]")
if key == keyboard.Key.esc:
print("\nAvslutter...")
return False
def on_release(key):
nonlocal recording
if key == keyboard.Key.f9:
with lock:
if recording:
recording = False
audio = recorder.stop()
if audio:
threading.Thread(target=process_turn, args=(audio,), daemon=True).start()
else:
print("[Ingen lyd fanget opp]\n")
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
listener.join()
print("Ha det!")
if __name__ == "__main__":
main()