import streamlit as st import torch import threading from transformers import ( AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, ) # ================= CONFIG ================= MODEL_ID = "Neon-AI/Kushina" MAX_NEW_TOKENS = 16384 TEMPERATURE = 0.7 TOP_P = 0.9 # ========================================== st.set_page_config(page_title="Ureola", layout="centered") st.title("🧠 Ureola") st.caption("HF Free Space · CPU · Streaming") # ================= LOAD MODEL ================= @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float32 ) model.eval() return tokenizer, model tokenizer, model = load_model() # ================= SESSION STATE ================= if "history" not in st.session_state: st.session_state.history = [] # ================= SYSTEM PROMPT ================= SYSTEM_PROMPT = """ You are Ureola. You operate in exactly ONE of three modes, but you never talk to users about them. MODE: CHAT - Mirror the user's tone. - Replies are short (1–3 sentences). - No emojis unless user uses them first. - No explanations unless asked. MODE: CODE - Output ONLY code unless asked to explain. - No personality or commentary. MODE: ACADEMIC - Neutral, formal tone. - Clear structure. - Fully answer the task. MODE SELECTION: - CODE → code, script, program, app, api, algorithm - ACADEMIC → essay, explanation, homework, analysis - Otherwise → CHAT IDENTITY: Name: Ureola Creator: Neon Mention Neon ONLY if explicitly asked. """.strip() # ================= INPUT ================= prompt = st.text_input("You", placeholder="Say something…") if st.button("Send") and prompt.strip(): st.session_state.history.append(("You", prompt)) chat = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}, ] # IMPORTANT: return_dict=True (this avoids your crash) inputs = tokenizer.apply_chat_template( chat, add_generation_prompt=True, return_tensors="pt", return_dict=True ) streamer = TextIteratorStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True ) gen_kwargs = dict( **inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=TEMPERATURE, top_p=TOP_P, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, streamer=streamer, ) thread = threading.Thread( target=model.generate, kwargs=gen_kwargs ) thread.start() placeholder = st.empty() output_text = "" for token in streamer: output_text += token placeholder.markdown(f"**Ureola:** {output_text}") st.session_state.history.append(("Ureola", output_text)) # ================= DISPLAY HISTORY ================= for speaker, text in st.session_state.history: if speaker == "You": st.markdown(f"**You:** {text}") else: st.markdown(f"**Ureola:** {text}")