i04n4 commited on
Commit
a754a3f
·
verified ·
1 Parent(s): cf6da5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -16
app.py CHANGED
@@ -1,33 +1,52 @@
1
  import os
2
- os.system("pip install --force-reinstall --no-cache-dir https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.90/llama_cpp_python-0.2.90-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl")
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import gradio as gr
5
  from llama_cpp import Llama
6
  from huggingface_hub import hf_hub_download
7
- REPO_ID = "i04n4/llama3.2-3b-math-gguf"
8
- FILENAME = "model.gguf"
9
 
10
- model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
 
 
 
 
 
 
 
 
 
11
 
12
  print("🚀 Încarc motorul Llama...")
 
13
  llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
14
 
15
  def generate_response(message, history):
16
- system_prompt = """You are a specialized mathematical assistant. Your goal is to be precise and efficient.
17
-
18
- RULES:
19
- - RESPONSE TYPE A (Basic/Fact-based): If the question is simple arithmetic (e.g., 2+2) or a basic fact, provide ONLY the final result. Be as concise as possible.
20
- - RESPONSE TYPE B (Complex/Logic): If the question requires multiple steps, algebra, or reasoning, provide a clear 'Step-by-step reasoning' followed by 'Final Answer'.
21
- - FORMATTING: Never use internal scratchpad tags like <<...>>. Use plain text only. Do not use your inner thougths, only the explanation. Do not use any symbols that are not needed
22
- - LANGUAGE: Always answer in English."""
23
-
24
  full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
25
 
26
  response = ""
27
  output = llm(
28
  full_prompt,
29
  max_tokens=512,
30
- stop=["<|eot_id|>", "User:"],
31
  echo=False,
32
  stream=True
33
  )
@@ -39,9 +58,8 @@ RULES:
39
 
40
  demo = gr.ChatInterface(
41
  generate_response,
42
- title="Super Awesome Math Tutor",
43
- description="Ask me a math question!!!!!!",
44
- examples=["Calculate 25 * 14", "Solve 2x + 5 = 15", "Derivative of x^2"]
45
  )
46
 
47
  if __name__ == "__main__":
 
1
  import os
2
+ import subprocess
3
+ import sys
4
 
5
+ # --- ZONA MAGICĂ DE INSTALARE ---
6
+ # Verificăm dacă llama_cpp e instalat. Dacă nu, îl instalăm acum, la pornire.
7
+ try:
8
+ import llama_cpp
9
+ print("✅ Llama-cpp-python este deja instalat.")
10
+ except ImportError:
11
+ print("⏳ Se instalează llama-cpp-python pentru CPU (poate dura 30-60 secunde)...")
12
+ # Această comandă folosește index-ul oficial abetlen, care găsește singur versiunea corectă
13
+ subprocess.check_call([
14
+ sys.executable, "-m", "pip", "install",
15
+ "llama-cpp-python",
16
+ "--no-cache-dir",
17
+ "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
18
+ ])
19
+ print("✅ Instalare completă!")
20
+
21
+ # --- ACUM IMPORTĂM BIBLIOTECILE ---
22
  import gradio as gr
23
  from llama_cpp import Llama
24
  from huggingface_hub import hf_hub_download
 
 
25
 
26
+ # --- CONFIGURARE ---
27
+ REPO_ID = "IoanaUser/Llama-3.2-3B-Math-GGUF" # <--- Verifică să fie numele tău corect
28
+ FILENAME = "llama-3.2-3b-math-q4_k_m.gguf" # <--- Verifică numele fișierului
29
+
30
+ print(f"⏳ Descarc modelul {FILENAME}...")
31
+ try:
32
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
33
+ except Exception as e:
34
+ print(f"❌ Eroare la descărcare: {e}")
35
+ raise e
36
 
37
  print("🚀 Încarc motorul Llama...")
38
+ # n_threads=2 este optim pentru Free Tier
39
  llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
40
 
41
  def generate_response(message, history):
42
+ system_prompt = "You are a helpful mathematical assistant. Answer directly and precisely."
 
 
 
 
 
 
 
43
  full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
44
 
45
  response = ""
46
  output = llm(
47
  full_prompt,
48
  max_tokens=512,
49
+ stop=["<|eot_id|>"],
50
  echo=False,
51
  stream=True
52
  )
 
58
 
59
  demo = gr.ChatInterface(
60
  generate_response,
61
+ title="Math tutor",
62
+ description="Ask me a question vro<3",
 
63
  )
64
 
65
  if __name__ == "__main__":