carbonx commited on
Commit
887bb38
·
verified ·
1 Parent(s): 4585960

Add cli.py

Browse files
Files changed (1) hide show
  1. cli.py +87 -0
cli.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Buddy CLI — Headless Push-to-Talk AI Assistant
3
+
4
+ Bruk uten GUI for testing eller som daemon.
5
+ python cli.py
6
+ """
7
+ import sys
8
+ import threading
9
+ import time
10
+ from pynput import keyboard
11
+
12
+ from audio_io import PushToTalkRecorder, TTSEngine
13
+ from screen_capture import capture_primary_monitor
14
+ from vision_llm import MultimodalAssistant
15
+
16
+
17
+ def main():
18
+ print("=" * 60)
19
+ print(" Buddy — Push-to-Talk AI Assistant (CLI)")
20
+ print("=" * 60)
21
+ print("\nTaster:")
22
+ print(" F9 — Hold for aa snakke, slipp for aa sende")
23
+ print(" Esc — Avslutt\n")
24
+
25
+ print("Laster modeller (dette tar et minutt...)...")
26
+ recorder = PushToTalkRecorder()
27
+ tts = TTSEngine(voice="nb-NO-FinnNeural")
28
+
29
+ try:
30
+ assistant = MultimodalAssistant()
31
+ except Exception as e:
32
+ print("Feil ved modelllasting: " + str(e))
33
+ sys.exit(1)
34
+
35
+ print("Klar!\n")
36
+ recording = False
37
+ lock = threading.Lock()
38
+
39
+ def process_turn(audio_bytes):
40
+ try:
41
+ print("[STT] Transkriberer...")
42
+ transcript = assistant.transcribe_audio(audio_bytes)
43
+ print("Du: " + transcript)
44
+
45
+ print("[VLM] Ser paa skjermbildet og tenker...")
46
+ screenshot = capture_primary_monitor()
47
+ response = assistant.ask_with_image(screenshot, transcript)
48
+
49
+ print("Buddy: " + response)
50
+ print("-" * 60)
51
+ tts.speak(response, blocking=False)
52
+ except Exception as e:
53
+ print("[feil] " + str(e))
54
+
55
+ def on_press(key):
56
+ nonlocal recording
57
+ if key == keyboard.Key.f9:
58
+ with lock:
59
+ if not recording:
60
+ recording = True
61
+ recorder.start()
62
+ print("\n[Lytter... slipp F9 for aa sende]")
63
+
64
+ if key == keyboard.Key.esc:
65
+ print("\nAvslutter...")
66
+ return False
67
+
68
+ def on_release(key):
69
+ nonlocal recording
70
+ if key == keyboard.Key.f9:
71
+ with lock:
72
+ if recording:
73
+ recording = False
74
+ audio = recorder.stop()
75
+ if audio:
76
+ threading.Thread(target=process_turn, args=(audio,), daemon=True).start()
77
+ else:
78
+ print("[Ingen lyd fanget opp]\n")
79
+
80
+ with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
81
+ listener.join()
82
+
83
+ print("Ha det!")
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()