Spaces:
Sleeping
Sleeping
| """ | |
| Buddy Web — Gradio-basert multimodal assistent (HF Space versjon) | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| from PIL import Image | |
| import tempfile | |
| import os | |
| import scipy.io.wavfile as wavfile | |
| from huggingface_hub import InferenceClient | |
| def create_app(): | |
| # Bruker HF_TOKEN som settes automatisk i HF Spaces | |
| client = InferenceClient(token=os.environ.get("HF_TOKEN")) | |
| def process_turn(audio, screenshot): | |
| if audio is None and screenshot is None: | |
| return "Vennligst snakk inn noe eller last opp et skjermbilde." | |
| content = [] | |
| if screenshot is not None: | |
| content.append({"type": "image", "image": screenshot}) | |
| if audio is not None: | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| sr, data = audio | |
| wavfile.write(f.name, sr, data.astype(np.int16)) | |
| tmp = f.name | |
| try: | |
| transcript = client.automatic_speech_recognition( | |
| tmp, | |
| model="openai/whisper-large-v3", | |
| ) | |
| text = transcript.text | |
| except Exception as e: | |
| text = "[Kunne ikke transkribere: " + str(e) + "]" | |
| finally: | |
| os.unlink(tmp) | |
| content.append({"type": "text", "text": text}) | |
| else: | |
| text = "Beskriv det du ser paa skjermbildet." | |
| content.append({"type": "text", "text": text}) | |
| messages = [{"role": "user", "content": content}] | |
| try: | |
| response = client.chat_completion( | |
| model="Qwen/Qwen2.5-VL-7B-Instruct", | |
| messages=messages, | |
| max_tokens=512, | |
| ) | |
| reply = response.choices[0].message.content | |
| except Exception as e: | |
| reply = "Feil: " + str(e) | |
| return reply | |
| with gr.Blocks(title="Buddy — Push-to-Talk AI Assistant", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# Buddy AI Assistent") | |
| gr.Markdown("Snakk inn eller skriv. Assistenten ser skjermbildet ditt.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| screenshot_input = gr.Image( | |
| label="Skjermbilde / Dokument", | |
| type="pil", | |
| sources=["upload", "clipboard"], | |
| ) | |
| audio_input = gr.Audio( | |
| label="Push-to-Talk (spill inn stemme)", | |
| sources=["microphone"], | |
| type="numpy", | |
| ) | |
| submit_btn = gr.Button("Send til Buddy", variant="primary") | |
| with gr.Column(): | |
| output = gr.Textbox( | |
| label="Buddy svarer", | |
| lines=10, | |
| interactive=False, | |
| ) | |
| clear_btn = gr.Button("Tom") | |
| submit_btn.click( | |
| fn=process_turn, | |
| inputs=[audio_input, screenshot_input], | |
| outputs=output, | |
| ) | |
| clear_btn.click(lambda: (None, None, ""), outputs=[audio_input, screenshot_input, output]) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_app() | |
| demo.launch() | |