carbonx commited on
Commit
2ba74a1
·
verified ·
1 Parent(s): 887bb38

Add webapp.py

Browse files
Files changed (1) hide show
  1. webapp.py +105 -0
webapp.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Buddy Web — Gradio-basert multimodal assistent
3
+
4
+ Enkel aa demonstrere uten desktop-avhengigheter.
5
+ Krever bare HuggingFace token for inference.
6
+
7
+ python webapp.py
8
+
9
+ Apner paa http://localhost:7860
10
+ """
11
+ import gradio as gr
12
+ import numpy as np
13
+ from PIL import Image
14
+ import tempfile
15
+ import os
16
+ import scipy.io.wavfile as wavfile
17
+
18
+ from huggingface_hub import InferenceClient
19
+
20
+
21
+ def create_app():
22
+ client = InferenceClient()
23
+
24
+ def process_turn(audio, screenshot):
25
+ if audio is None and screenshot is None:
26
+ return "Vennligst snakk inn noe eller last opp et skjermbilde."
27
+
28
+ content = []
29
+ if screenshot is not None:
30
+ content.append({"type": "image", "image": screenshot})
31
+ if audio is not None:
32
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
33
+ sr, data = audio
34
+ wavfile.write(f.name, sr, data.astype(np.int16))
35
+ tmp = f.name
36
+
37
+ try:
38
+ transcript = client.automatic_speech_recognition(
39
+ tmp,
40
+ model="openai/whisper-large-v3",
41
+ )
42
+ text = transcript.text
43
+ except Exception as e:
44
+ text = "[Kunne ikke transkribere: " + str(e) + "]"
45
+ finally:
46
+ os.unlink(tmp)
47
+
48
+ content.append({"type": "text", "text": text})
49
+ else:
50
+ text = "Beskriv det du ser paa skjermbildet."
51
+ content.append({"type": "text", "text": text})
52
+
53
+ messages = [{"role": "user", "content": content}]
54
+
55
+ try:
56
+ response = client.chat_completion(
57
+ model="Qwen/Qwen2.5-VL-7B-Instruct",
58
+ messages=messages,
59
+ max_tokens=512,
60
+ )
61
+ reply = response.choices[0].message.content
62
+ except Exception as e:
63
+ reply = "Feil: " + str(e)
64
+
65
+ return reply
66
+
67
+ with gr.Blocks(title="Buddy — Push-to-Talk AI Assistant", theme=gr.themes.Soft()) as demo:
68
+ gr.Markdown("# Buddy AI Assistent")
69
+ gr.Markdown("Snakk inn eller skriv. Assistenten ser skjermbildet ditt.")
70
+
71
+ with gr.Row():
72
+ with gr.Column():
73
+ screenshot_input = gr.Image(
74
+ label="Skjermbilde / Dokument",
75
+ type="pil",
76
+ sources=["upload", "clipboard"],
77
+ )
78
+ audio_input = gr.Audio(
79
+ label="Push-to-Talk (spill inn stemme)",
80
+ sources=["microphone"],
81
+ type="numpy",
82
+ )
83
+ submit_btn = gr.Button("Send til Buddy", variant="primary")
84
+
85
+ with gr.Column():
86
+ output = gr.Textbox(
87
+ label="Buddy svarer",
88
+ lines=10,
89
+ interactive=False,
90
+ )
91
+ clear_btn = gr.Button("Tom")
92
+
93
+ submit_btn.click(
94
+ fn=process_turn,
95
+ inputs=[audio_input, screenshot_input],
96
+ outputs=output,
97
+ )
98
+ clear_btn.click(lambda: (None, None, ""), outputs=[audio_input, screenshot_input, output])
99
+
100
+ return demo
101
+
102
+
103
+ if __name__ == "__main__":
104
+ demo = create_app()
105
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)