Spaces:

Arabic250
/

gemmagguf

Paused

Arabic250 commited on Apr 12

Commit

de1db51

verified ·

1 Parent(s): d6df91e

Upload app.py with huggingface_hub

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,26 +1,39 @@
 import gradio as gr
 from llama_cpp import Llama
 import os
-# تحميل النموذج
-print("Loading model...")
-llm = Llama(
-    model_path="gemma-4-medical.gguf",
-    n_ctx=2048,
-    n_threads=2
-)
-def generate_response(message, history):
-    prompt = f"USER: {message}
-ASSISTANT: "
-    response = llm(prompt, max_tokens=512, stop=["USER:"], echo=False)
-    return response["choices"][0]["text"]
-demo = gr.ChatInterface(
-    fn=generate_response,
-    title="Gemma 4 Medical GGUF",
-    description="Powered by llama-cpp-python inside HF Spaces"
-)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 import os
+print("--- Starting Space ---")
+# Download from the Model Repository instead of local Space storage
+print("Downloading model from Model Hub (9.6GB)...")
+try:
+    model_path = hf_hub_download(
+        repo_id="Arabic250/gemma-4-gguf-export",
+        filename="gemma-4-medical.gguf"
+    )
+    print(f"Model downloaded to: {model_path}")
+    print("Loading model into Llama-CPP...")
+    llm = Llama(
+        model_path=model_path,
+        n_ctx=2048,
+        n_threads=2
+    )
+    def generate_response(message, history):
+        prompt = f"USER: {message}\nASSISTANT: "
+        response = llm(prompt, max_tokens=512, stop=["USER:"], echo=False)
+        return response["choices"][0]["text"]
+    demo = gr.ChatInterface(
+        fn=generate_response,
+        title="Gemma 4 Medical - GGUF Hub Edition",
+        description="This Space pulls the model from Arabic250/gemma-4-gguf-export"
+    )
+except Exception as e:
+    print(f"Error loading model: {e}")
+    with gr.Blocks() as demo:
+        gr.Markdown(f"### ⚠️ Error: {e}")
 if __name__ == "__main__":
     demo.launch()