| """ |
| Buddy CLI — Headless Push-to-Talk AI Assistant |
| |
| Bruk uten GUI for testing eller som daemon. |
| python cli.py |
| """ |
| import sys |
| import threading |
| import time |
| from pynput import keyboard |
|
|
| from audio_io import PushToTalkRecorder, TTSEngine |
| from screen_capture import capture_primary_monitor |
| from vision_llm import MultimodalAssistant |
|
|
|
|
| def main(): |
| print("=" * 60) |
| print(" Buddy — Push-to-Talk AI Assistant (CLI)") |
| print("=" * 60) |
| print("\nTaster:") |
| print(" F9 — Hold for aa snakke, slipp for aa sende") |
| print(" Esc — Avslutt\n") |
|
|
| print("Laster modeller (dette tar et minutt...)...") |
| recorder = PushToTalkRecorder() |
| tts = TTSEngine(voice="nb-NO-FinnNeural") |
|
|
| try: |
| assistant = MultimodalAssistant() |
| except Exception as e: |
| print("Feil ved modelllasting: " + str(e)) |
| sys.exit(1) |
|
|
| print("Klar!\n") |
| recording = False |
| lock = threading.Lock() |
|
|
| def process_turn(audio_bytes): |
| try: |
| print("[STT] Transkriberer...") |
| transcript = assistant.transcribe_audio(audio_bytes) |
| print("Du: " + transcript) |
|
|
| print("[VLM] Ser paa skjermbildet og tenker...") |
| screenshot = capture_primary_monitor() |
| response = assistant.ask_with_image(screenshot, transcript) |
|
|
| print("Buddy: " + response) |
| print("-" * 60) |
| tts.speak(response, blocking=False) |
| except Exception as e: |
| print("[feil] " + str(e)) |
|
|
| def on_press(key): |
| nonlocal recording |
| if key == keyboard.Key.f9: |
| with lock: |
| if not recording: |
| recording = True |
| recorder.start() |
| print("\n[Lytter... slipp F9 for aa sende]") |
|
|
| if key == keyboard.Key.esc: |
| print("\nAvslutter...") |
| return False |
|
|
| def on_release(key): |
| nonlocal recording |
| if key == keyboard.Key.f9: |
| with lock: |
| if recording: |
| recording = False |
| audio = recorder.stop() |
| if audio: |
| threading.Thread(target=process_turn, args=(audio,), daemon=True).start() |
| else: |
| print("[Ingen lyd fanget opp]\n") |
|
|
| with keyboard.Listener(on_press=on_press, on_release=on_release) as listener: |
| listener.join() |
|
|
| print("Ha det!") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|