carbonx commited on
Commit
b9cf533
·
verified ·
1 Parent(s): 8159c23

Add HF Space app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Buddy Web — Gradio-basert multimodal assistent (HF Space versjon)
3
+ """
4
+ import gradio as gr
5
+ import numpy as np
6
+ from PIL import Image
7
+ import tempfile
8
+ import os
9
+ import scipy.io.wavfile as wavfile
10
+
11
+ from huggingface_hub import InferenceClient
12
+
13
+
14
+ def create_app():
15
+ client = InferenceClient()
16
+
17
+ def process_turn(audio, screenshot):
18
+ if audio is None and screenshot is None:
19
+ return "Vennligst snakk inn noe eller last opp et skjermbilde."
20
+
21
+ content = []
22
+ if screenshot is not None:
23
+ content.append({"type": "image", "image": screenshot})
24
+
25
+ if audio is not None:
26
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
27
+ sr, data = audio
28
+ wavfile.write(f.name, sr, data.astype(np.int16))
29
+ tmp = f.name
30
+ try:
31
+ transcript = client.automatic_speech_recognition(
32
+ tmp,
33
+ model="openai/whisper-large-v3",
34
+ )
35
+ text = transcript.text
36
+ except Exception as e:
37
+ text = "[Kunne ikke transkribere: " + str(e) + "]"
38
+ finally:
39
+ os.unlink(tmp)
40
+ content.append({"type": "text", "text": text})
41
+ else:
42
+ text = "Beskriv det du ser paa skjermbildet."
43
+ content.append({"type": "text", "text": text})
44
+
45
+ messages = [{"role": "user", "content": content}]
46
+
47
+ try:
48
+ response = client.chat_completion(
49
+ model="Qwen/Qwen2.5-VL-7B-Instruct",
50
+ messages=messages,
51
+ max_tokens=512,
52
+ )
53
+ reply = response.choices[0].message.content
54
+ except Exception as e:
55
+ reply = "Feil: " + str(e)
56
+
57
+ return reply
58
+
59
+ with gr.Blocks(title="Buddy — Push-to-Talk AI Assistant", theme=gr.themes.Soft()) as demo:
60
+ gr.Markdown("# Buddy AI Assistent")
61
+ gr.Markdown("Snakk inn eller skriv. Assistenten ser skjermbildet ditt.")
62
+
63
+ with gr.Row():
64
+ with gr.Column():
65
+ screenshot_input = gr.Image(
66
+ label="Skjermbilde / Dokument",
67
+ type="pil",
68
+ sources=["upload", "clipboard"],
69
+ )
70
+ audio_input = gr.Audio(
71
+ label="Push-to-Talk (spill inn stemme)",
72
+ sources=["microphone"],
73
+ type="numpy",
74
+ )
75
+ submit_btn = gr.Button("Send til Buddy", variant="primary")
76
+
77
+ with gr.Column():
78
+ output = gr.Textbox(
79
+ label="Buddy svarer",
80
+ lines=10,
81
+ interactive=False,
82
+ )
83
+ clear_btn = gr.Button("Tom")
84
+
85
+ submit_btn.click(
86
+ fn=process_turn,
87
+ inputs=[audio_input, screenshot_input],
88
+ outputs=output,
89
+ )
90
+ clear_btn.click(lambda: (None, None, ""), outputs=[audio_input, screenshot_input, output])
91
+
92
+ return demo
93
+
94
+
95
+ if __name__ == "__main__":
96
+ demo = create_app()
97
+ demo.launch()