Spaces:

LuminLabs
/

lumin_pro

Sleeping

App Files Files Community

nova commited on Jan 10

Commit

1c4aa85

verified ·

1 Parent(s): 428206d

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -20

app.py CHANGED Viewed

@@ -1,51 +1,50 @@
-mport gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# PRO: Qwen 2.5 14B Instruct (GGUF) - Q4_K_M (Best Balance)
-# Fits in 16GB RAM (Free Tier T4 GPU or CPU Upgrade)
-# Size: ~9GB Model + Context
-REPO_ID = "Qwen/Qwen2.5-14B-Instruct-GGUF"
-FILENAME = "qwen2.5-14b-instruct-q4_k_m.gguf"
-print(f"Downloading {FILENAME} from {REPO_ID}...")
-# Use 'main' or specific quantization repo if needed
-# Actually Qwen official usually puts GGUF in separate repo or subfolder.
-# Let's use a reliable GGUF mirror/quantizer if official is confusing, but Bartowski is standard.
-# REPO: bartowski/Qwen2.5-14B-Instruct-GGUF
 REPO_ID = "bartowski/Qwen2.5-14B-Instruct-GGUF"
 FILENAME = "Qwen2.5-14B-Instruct-Q4_K_M.gguf"
 model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
 print("Loading model...")
 llm = Llama(
     model_path=model_path,
-    n_ctx=4096,
-    n_threads=2, # Keep low for shared CPU
     verbose=False
 )
 def generate_pro(message, history):
-    # Qwen 2.5 Prompt Format
-    # <|im_start|>system\n...<|im_end|>\n<|im_start|>user\n...
-    prompt = "<|im_start|>system\nYou are Lumin Pro, an intelligent AI assistant built by NovaPixel.<|im_end|>\n"
     for user_msg, bot_msg in history:
         prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
     prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     stream = llm.create_completion(
         prompt,
         max_tokens=2048,
         stop=["<|im_end|>"],
         stream=True,
-        temperature=0.7
     )
     partial_text = ""
     for output in stream:
         delta = output['choices'][0]['text']
         partial_text += delta
         yield partial_text
 chat_interface = gr.ChatInterface(
     fn=generate_pro,
     title="🌟 Lumin Pro (Qwen 14B)",
-    description="Running Qwen 2.5 14B Instruct (GGUF) locally.",
-    examples=["Explain Quantum Physics", "Write a Python script for scraping"],
 )
 if __name__ == "__main__":
     chat_interface.launch(server_name="0.0.0.0", server_port=7860)

+import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+# PRO: Qwen 2.5 14B Instruct (GGUF) - Q4_K_M
+# Cabe en 16GB RAM (justo pero funciona)
 REPO_ID = "bartowski/Qwen2.5-14B-Instruct-GGUF"
 FILENAME = "Qwen2.5-14B-Instruct-Q4_K_M.gguf"
+print(f"Downloading {FILENAME} from {REPO_ID}...")
 model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
 print("Loading model...")
 llm = Llama(
     model_path=model_path,
+    n_ctx=8192,         # Contexto moderado por RAM
+    n_threads=2,        # CPU Friendly
     verbose=False
 )
 def generate_pro(message, history):
+    # Formato ChatML (Estandard de Qwen)
+    prompt = ""
     for user_msg, bot_msg in history:
         prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
     prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
     stream = llm.create_completion(
         prompt,
         max_tokens=2048,
         stop=["<|im_end|>"],
         stream=True,
+        temperature=0.7,
+        top_p=0.9
     )
     partial_text = ""
     for output in stream:
         delta = output['choices'][0]['text']
         partial_text += delta
         yield partial_text
 chat_interface = gr.ChatInterface(
     fn=generate_pro,
     title="🌟 Lumin Pro (Qwen 14B)",
+    description="Running Qwen2.5-14B-Instruct (GGUF). Balanced Power.",
 )
 if __name__ == "__main__":
     chat_interface.launch(server_name="0.0.0.0", server_port=7860)