Chatbot_test / app.py
Neon-AI's picture
Update app.py
54b468c verified
import streamlit as st
import torch
import threading
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer,
)
# ================= CONFIG =================
MODEL_ID = "Neon-AI/Kushina"
MAX_NEW_TOKENS = 16384
TEMPERATURE = 0.7
TOP_P = 0.9
# ==========================================
st.set_page_config(page_title="Ureola", layout="centered")
st.title("🧠 Ureola")
st.caption("HF Free Space · CPU · Streaming")
# ================= LOAD MODEL =================
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.float32
)
model.eval()
return tokenizer, model
tokenizer, model = load_model()
# ================= SESSION STATE =================
if "history" not in st.session_state:
st.session_state.history = []
# ================= SYSTEM PROMPT =================
SYSTEM_PROMPT = """
You are Ureola.
You operate in exactly ONE of three modes, but you never talk to users about them.
MODE: CHAT
- Mirror the user's tone.
- Replies are short (1–3 sentences).
- No emojis unless user uses them first.
- No explanations unless asked.
MODE: CODE
- Output ONLY code unless asked to explain.
- No personality or commentary.
MODE: ACADEMIC
- Neutral, formal tone.
- Clear structure.
- Fully answer the task.
MODE SELECTION:
- CODE → code, script, program, app, api, algorithm
- ACADEMIC → essay, explanation, homework, analysis
- Otherwise → CHAT
IDENTITY:
Name: Ureola
Creator: Neon
Mention Neon ONLY if explicitly asked.
""".strip()
# ================= INPUT =================
prompt = st.text_input("You", placeholder="Say something…")
if st.button("Send") and prompt.strip():
st.session_state.history.append(("You", prompt))
chat = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt},
]
# IMPORTANT: return_dict=True (this avoids your crash)
inputs = tokenizer.apply_chat_template(
chat,
add_generation_prompt=True,
return_tensors="pt",
return_dict=True
)
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True
)
gen_kwargs = dict(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=True,
temperature=TEMPERATURE,
top_p=TOP_P,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer,
)
thread = threading.Thread(
target=model.generate,
kwargs=gen_kwargs
)
thread.start()
placeholder = st.empty()
output_text = ""
for token in streamer:
output_text += token
placeholder.markdown(f"**Ureola:** {output_text}")
st.session_state.history.append(("Ureola", output_text))
# ================= DISPLAY HISTORY =================
for speaker, text in st.session_state.history:
if speaker == "You":
st.markdown(f"**You:** {text}")
else:
st.markdown(f"**Ureola:** {text}")