| import gradio as gr |
| from llama_cpp import Llama |
| from huggingface_hub import hf_hub_download |
| import os |
|
|
| |
| PERSISTENT_DIR = "/data" |
| MODEL_DIR = os.path.join(PERSISTENT_DIR, "models") |
| os.makedirs(MODEL_DIR, exist_ok=True) |
|
|
| os.environ["HF_HOME"] = os.path.join(PERSISTENT_DIR, "hf_cache") |
| os.environ["HF_HUB_CACHE"] = os.path.join(PERSISTENT_DIR, "hf_cache") |
|
|
| |
| filename = "gemma-3-1b-it-Q4_K_M.gguf" |
|
|
| model_path = hf_hub_download( |
| repo_id="unsloth/gemma-3-1b-it-GGUF", |
| filename=filename, |
| local_dir=MODEL_DIR, |
| resume_download=True, |
| force_download=False |
| ) |
|
|
| print(f"✅ النموذج محمل بنجاح: {model_path}") |
|
|
| llm = Llama( |
| model_path=model_path, |
| n_ctx=8192, |
| n_threads=4, |
| n_gpu_layers=0, |
| n_batch=512, |
| verbose=False, |
| chat_format="gemma" |
| ) |
|
|
| def chat(message, history): |
| messages = history + [{"role": "user", "content": message}] |
| response = llm.create_chat_completion( |
| messages=messages, |
| temperature=0.7, |
| max_tokens=1024, |
| stop=["<end_of_turn>"] |
| ) |
| return response["choices"][0]["message"]["content"] |
|
|
| demo = gr.ChatInterface( |
| fn=chat, |
| title="🧠 Gemma 3 1B IT (GGUF) - Docker", |
| description="أحدث إصدار • Persistent Storage • يُحمّل مرة واحدة فقط", |
| examples=["مرحبا، كيف حالك؟", "اكتب قصة قصيرة بالعربية عن تونس"], |
| theme=gr.themes.Soft() |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860) |