Dav66 commited on
Commit
3408972
·
verified ·
1 Parent(s): 098e673

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
+ import os
5
+
6
+ # Persistent Storage
7
+ PERSISTENT_DIR = "/data"
8
+ MODEL_DIR = os.path.join(PERSISTENT_DIR, "models")
9
+ os.makedirs(MODEL_DIR, exist_ok=True)
10
+
11
+ os.environ["HF_HOME"] = os.path.join(PERSISTENT_DIR, "hf_cache")
12
+ os.environ["HF_HUB_CACHE"] = os.path.join(PERSISTENT_DIR, "hf_cache")
13
+
14
+ # أحدث Quantization متوازنة (Q4_K_M ≈ 806 MB)
15
+ filename = "gemma-3-1b-it-Q4_K_M.gguf"
16
+
17
+ model_path = hf_hub_download(
18
+ repo_id="unsloth/gemma-3-1b-it-GGUF",
19
+ filename=filename,
20
+ local_dir=MODEL_DIR,
21
+ resume_download=True,
22
+ force_download=False
23
+ )
24
+
25
+ print(f"✅ النموذج محمل بنجاح: {model_path}")
26
+
27
+ llm = Llama(
28
+ model_path=model_path,
29
+ n_ctx=8192,
30
+ n_threads=4, # زد إلى 6-8 إذا أردت
31
+ n_gpu_layers=0,
32
+ n_batch=512,
33
+ verbose=False,
34
+ chat_format="gemma"
35
+ )
36
+
37
+ def chat(message, history):
38
+ messages = history + [{"role": "user", "content": message}]
39
+ response = llm.create_chat_completion(
40
+ messages=messages,
41
+ temperature=0.7,
42
+ max_tokens=1024,
43
+ stop=["<end_of_turn>"]
44
+ )
45
+ return response["choices"][0]["message"]["content"]
46
+
47
+ demo = gr.ChatInterface(
48
+ fn=chat,
49
+ title="🧠 Gemma 3 1B IT (GGUF) - Docker",
50
+ description="أحدث إصدار • Persistent Storage • يُحمّل مرة واحدة فقط",
51
+ examples=["مرحبا، كيف حالك؟", "اكتب قصة قصيرة بالعربية عن تونس"],
52
+ theme=gr.themes.Soft()
53
+ )
54
+
55
+ if __name__ == "__main__":
56
+ demo.launch(server_name="0.0.0.0", server_port=7860)