Update app.py
Browse files
app.py
CHANGED
|
@@ -1,33 +1,52 @@
|
|
| 1 |
import os
|
| 2 |
-
|
|
|
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
from llama_cpp import Llama
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
-
REPO_ID = "i04n4/llama3.2-3b-math-gguf"
|
| 8 |
-
FILENAME = "model.gguf"
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
print("🚀 Încarc motorul Llama...")
|
|
|
|
| 13 |
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
|
| 14 |
|
| 15 |
def generate_response(message, history):
|
| 16 |
-
system_prompt = "
|
| 17 |
-
|
| 18 |
-
RULES:
|
| 19 |
-
- RESPONSE TYPE A (Basic/Fact-based): If the question is simple arithmetic (e.g., 2+2) or a basic fact, provide ONLY the final result. Be as concise as possible.
|
| 20 |
-
- RESPONSE TYPE B (Complex/Logic): If the question requires multiple steps, algebra, or reasoning, provide a clear 'Step-by-step reasoning' followed by 'Final Answer'.
|
| 21 |
-
- FORMATTING: Never use internal scratchpad tags like <<...>>. Use plain text only. Do not use your inner thougths, only the explanation. Do not use any symbols that are not needed
|
| 22 |
-
- LANGUAGE: Always answer in English."""
|
| 23 |
-
|
| 24 |
full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
| 25 |
|
| 26 |
response = ""
|
| 27 |
output = llm(
|
| 28 |
full_prompt,
|
| 29 |
max_tokens=512,
|
| 30 |
-
stop=["<|eot_id|>"
|
| 31 |
echo=False,
|
| 32 |
stream=True
|
| 33 |
)
|
|
@@ -39,9 +58,8 @@ RULES:
|
|
| 39 |
|
| 40 |
demo = gr.ChatInterface(
|
| 41 |
generate_response,
|
| 42 |
-
title="
|
| 43 |
-
description="Ask me a
|
| 44 |
-
examples=["Calculate 25 * 14", "Solve 2x + 5 = 15", "Derivative of x^2"]
|
| 45 |
)
|
| 46 |
|
| 47 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import sys
|
| 4 |
|
| 5 |
+
# --- ZONA MAGICĂ DE INSTALARE ---
|
| 6 |
+
# Verificăm dacă llama_cpp e instalat. Dacă nu, îl instalăm acum, la pornire.
|
| 7 |
+
try:
|
| 8 |
+
import llama_cpp
|
| 9 |
+
print("✅ Llama-cpp-python este deja instalat.")
|
| 10 |
+
except ImportError:
|
| 11 |
+
print("⏳ Se instalează llama-cpp-python pentru CPU (poate dura 30-60 secunde)...")
|
| 12 |
+
# Această comandă folosește index-ul oficial abetlen, care găsește singur versiunea corectă
|
| 13 |
+
subprocess.check_call([
|
| 14 |
+
sys.executable, "-m", "pip", "install",
|
| 15 |
+
"llama-cpp-python",
|
| 16 |
+
"--no-cache-dir",
|
| 17 |
+
"--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
|
| 18 |
+
])
|
| 19 |
+
print("✅ Instalare completă!")
|
| 20 |
+
|
| 21 |
+
# --- ACUM IMPORTĂM BIBLIOTECILE ---
|
| 22 |
import gradio as gr
|
| 23 |
from llama_cpp import Llama
|
| 24 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
# --- CONFIGURARE ---
|
| 27 |
+
REPO_ID = "IoanaUser/Llama-3.2-3B-Math-GGUF" # <--- Verifică să fie numele tău corect
|
| 28 |
+
FILENAME = "llama-3.2-3b-math-q4_k_m.gguf" # <--- Verifică numele fișierului
|
| 29 |
+
|
| 30 |
+
print(f"⏳ Descarc modelul {FILENAME}...")
|
| 31 |
+
try:
|
| 32 |
+
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f"❌ Eroare la descărcare: {e}")
|
| 35 |
+
raise e
|
| 36 |
|
| 37 |
print("🚀 Încarc motorul Llama...")
|
| 38 |
+
# n_threads=2 este optim pentru Free Tier
|
| 39 |
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
|
| 40 |
|
| 41 |
def generate_response(message, history):
|
| 42 |
+
system_prompt = "You are a helpful mathematical assistant. Answer directly and precisely."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
| 44 |
|
| 45 |
response = ""
|
| 46 |
output = llm(
|
| 47 |
full_prompt,
|
| 48 |
max_tokens=512,
|
| 49 |
+
stop=["<|eot_id|>"],
|
| 50 |
echo=False,
|
| 51 |
stream=True
|
| 52 |
)
|
|
|
|
| 58 |
|
| 59 |
demo = gr.ChatInterface(
|
| 60 |
generate_response,
|
| 61 |
+
title="Math tutor",
|
| 62 |
+
description="Ask me a question vro<3",
|
|
|
|
| 63 |
)
|
| 64 |
|
| 65 |
if __name__ == "__main__":
|