| import os |
| import subprocess |
| import sys |
|
|
| |
| |
| try: |
| import llama_cpp |
| print("✅ Llama-cpp-python este deja instalat.") |
| except ImportError: |
| print("⏳ Se instalează llama-cpp-python pentru CPU (poate dura 30-60 secunde)...") |
| |
| subprocess.check_call([ |
| sys.executable, "-m", "pip", "install", |
| "llama-cpp-python", |
| "--no-cache-dir", |
| "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu" |
| ]) |
| print("✅ Instalare completă!") |
|
|
| |
| import gradio as gr |
| from llama_cpp import Llama |
| from huggingface_hub import hf_hub_download |
|
|
| |
| REPO_ID = "i04n4/llama3.2-3b-math-gguf" |
| FILENAME = "model.gguf" |
|
|
| print(f"⏳ Descarc modelul {FILENAME}...") |
| try: |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) |
| except Exception as e: |
| print(f"❌ Eroare la descărcare: {e}") |
| raise e |
|
|
| print("🚀 Încarc motorul Llama...") |
| |
| llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2) |
|
|
| def generate_response(message, history): |
| system_prompt = "You are a helpful mathematical assistant. Answer directly and precisely." |
| full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" |
| |
| response = "" |
| output = llm( |
| full_prompt, |
| max_tokens=512, |
| stop=["<|eot_id|>"], |
| echo=False, |
| stream=True |
| ) |
| |
| for chunk in output: |
| delta = chunk['choices'][0]['text'] |
| response += delta |
| yield response |
|
|
| demo = gr.ChatInterface( |
| generate_response, |
| title="Math tutor", |
| description="Ask me a question vro<3", |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |