carbonx commited on
Commit
45ee2cb
·
verified ·
1 Parent(s): 96b68f8

Oppdaterer main.py med graceful imports og OOM fallback

Browse files
Files changed (1) hide show
  1. main.py +43 -8
main.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  Buddy — Desktop Push-to-Talk AI Assistant
3
 
@@ -29,6 +30,33 @@ from audio_io import PushToTalkRecorder, TTSEngine
29
  from screen_capture import capture_primary_monitor
30
  from vision_llm import MultimodalAssistant
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  class BuddyApp:
34
  def __init__(self):
@@ -144,23 +172,35 @@ class BuddyApp:
144
  self.screenshot_label.config(text="Kunne ikke ta skjermbilde: " + str(e))
145
 
146
  def _load_model(self):
 
147
  try:
148
  self.assistant = MultimodalAssistant()
149
  self.model_loaded = True
150
  self.msg_queue.put(("status", "Klar! Hold F9 for å snakke"))
 
 
 
 
 
 
 
 
 
 
 
151
  except Exception as e:
152
  self.msg_queue.put(("status", "Feil ved lasting: " + str(e)))
153
  print("[init] Model load error: " + str(e))
154
 
155
  def _start_recording(self):
156
  if not self.model_loaded:
157
- self.msg_queue.put(("status", "Vent, modell laster ennaa..."))
158
  return
159
  if self.recorder.is_recording():
160
  return
161
 
162
  self.recorder.start()
163
- self.msg_queue.put(("status_recording", "Lytter... slipp for aa sende"))
164
  self._update_screenshot()
165
 
166
  def _stop_recording(self):
@@ -174,7 +214,6 @@ class BuddyApp:
174
  self.msg_queue.put(("status", "Ingen lyd fanget opp"))
175
  return
176
 
177
- # Process in background thread
178
  threading.Thread(
179
  target=self._process_turn,
180
  args=(audio_bytes,),
@@ -183,20 +222,16 @@ class BuddyApp:
183
 
184
  def _process_turn(self, audio_bytes):
185
  try:
186
- # 1. STT
187
  transcript = self.assistant.transcribe_audio(audio_bytes)
188
  self.msg_queue.put(("chat_user", transcript))
189
 
190
- # 2. Capture fresh screenshot
191
- self.msg_queue.put(("status", "Ser paa skjermbildet..."))
192
  screenshot = capture_primary_monitor()
193
  self.msg_queue.put(("screenshot", None))
194
 
195
- # 3. VLM
196
  self.msg_queue.put(("status", "Tenker..."))
197
  response = self.assistant.ask_with_image(screenshot, transcript)
198
 
199
- # 4. Update UI + TTS
200
  self.msg_queue.put(("chat_buddy", response))
201
  self.msg_queue.put(("status", "Klar!"))
202
  self.tts.speak(response, blocking=False)
 
1
+
2
  """
3
  Buddy — Desktop Push-to-Talk AI Assistant
4
 
 
30
  from screen_capture import capture_primary_monitor
31
  from vision_llm import MultimodalAssistant
32
 
33
+ # Hjelp for Windows brukere
34
+ _MISSING_DEPS = []
35
+ try:
36
+ import sounddevice
37
+ except ImportError:
38
+ _MISSING_DEPS.append("sounddevice")
39
+ try:
40
+ import soundfile
41
+ except ImportError:
42
+ _MISSING_DEPS.append("soundfile")
43
+ try:
44
+ import pynput
45
+ except ImportError:
46
+ _MISSING_DEPS.append("pynput")
47
+
48
+ if _MISSING_DEPS:
49
+ print("=" * 60)
50
+ print("FEIL: Mangler Python-pakker: %s" % ", ".join(_MISSING_DEPS))
51
+ print()
52
+ print("Kj\u00f8r f\u00f8rst:")
53
+ print(" pip install -r requirements.txt")
54
+ print()
55
+ print("Windows-brukere: sounddevice kan kreve wheel fra:")
56
+ print(" https://www.lfd.uci.edu/~gohlke/pythonlibs/#sounddevice")
57
+ print("=" * 60)
58
+ sys.exit(1)
59
+
60
 
61
  class BuddyApp:
62
  def __init__(self):
 
172
  self.screenshot_label.config(text="Kunne ikke ta skjermbilde: " + str(e))
173
 
174
  def _load_model(self):
175
+ import torch
176
  try:
177
  self.assistant = MultimodalAssistant()
178
  self.model_loaded = True
179
  self.msg_queue.put(("status", "Klar! Hold F9 for å snakke"))
180
+ except torch.cuda.OutOfMemoryError:
181
+ self.msg_queue.put(("status", "OOM: Vil tvinge qwen2-vl-2b..."))
182
+ print("[init] OOM! Prøver fallback til qwen2-vl-2b...")
183
+ try:
184
+ import os
185
+ os.environ["BUDDY_VLM_MODEL"] = "qwen2-vl-2b"
186
+ self.assistant = MultimodalAssistant()
187
+ self.model_loaded = True
188
+ self.msg_queue.put(("status", "Klar! (bruker lett modell) Hold F9 for å snakke"))
189
+ except Exception as e2:
190
+ self.msg_queue.put(("status", "Også OOM med 2B: " + str(e2)))
191
  except Exception as e:
192
  self.msg_queue.put(("status", "Feil ved lasting: " + str(e)))
193
  print("[init] Model load error: " + str(e))
194
 
195
  def _start_recording(self):
196
  if not self.model_loaded:
197
+ self.msg_queue.put(("status", "Vent, modell laster..."))
198
  return
199
  if self.recorder.is_recording():
200
  return
201
 
202
  self.recorder.start()
203
+ self.msg_queue.put(("status_recording", "Lytter... slipp for å sende"))
204
  self._update_screenshot()
205
 
206
  def _stop_recording(self):
 
214
  self.msg_queue.put(("status", "Ingen lyd fanget opp"))
215
  return
216
 
 
217
  threading.Thread(
218
  target=self._process_turn,
219
  args=(audio_bytes,),
 
222
 
223
  def _process_turn(self, audio_bytes):
224
  try:
 
225
  transcript = self.assistant.transcribe_audio(audio_bytes)
226
  self.msg_queue.put(("chat_user", transcript))
227
 
228
+ self.msg_queue.put(("status", "Ser skjermbildet..."))
 
229
  screenshot = capture_primary_monitor()
230
  self.msg_queue.put(("screenshot", None))
231
 
 
232
  self.msg_queue.put(("status", "Tenker..."))
233
  response = self.assistant.ask_with_image(screenshot, transcript)
234
 
 
235
  self.msg_queue.put(("chat_buddy", response))
236
  self.msg_queue.put(("status", "Klar!"))
237
  self.tts.speak(response, blocking=False)