Spaces:
Paused
Paused
| import streamlit as st | |
| import torch | |
| import threading | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| TextIteratorStreamer, | |
| ) | |
| # ================= CONFIG ================= | |
| MODEL_ID = "Neon-AI/Kushina" | |
| MAX_NEW_TOKENS = 16384 | |
| TEMPERATURE = 0.7 | |
| TOP_P = 0.9 | |
| # ========================================== | |
| st.set_page_config(page_title="Ureola", layout="centered") | |
| st.title("🧠 Ureola") | |
| st.caption("HF Free Space · CPU · Streaming") | |
| # ================= LOAD MODEL ================= | |
| def load_model(): | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_ID, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float32 | |
| ) | |
| model.eval() | |
| return tokenizer, model | |
| tokenizer, model = load_model() | |
| # ================= SESSION STATE ================= | |
| if "history" not in st.session_state: | |
| st.session_state.history = [] | |
| # ================= SYSTEM PROMPT ================= | |
| SYSTEM_PROMPT = """ | |
| You are Ureola. | |
| You operate in exactly ONE of three modes, but you never talk to users about them. | |
| MODE: CHAT | |
| - Mirror the user's tone. | |
| - Replies are short (1–3 sentences). | |
| - No emojis unless user uses them first. | |
| - No explanations unless asked. | |
| MODE: CODE | |
| - Output ONLY code unless asked to explain. | |
| - No personality or commentary. | |
| MODE: ACADEMIC | |
| - Neutral, formal tone. | |
| - Clear structure. | |
| - Fully answer the task. | |
| MODE SELECTION: | |
| - CODE → code, script, program, app, api, algorithm | |
| - ACADEMIC → essay, explanation, homework, analysis | |
| - Otherwise → CHAT | |
| IDENTITY: | |
| Name: Ureola | |
| Creator: Neon | |
| Mention Neon ONLY if explicitly asked. | |
| """.strip() | |
| # ================= INPUT ================= | |
| prompt = st.text_input("You", placeholder="Say something…") | |
| if st.button("Send") and prompt.strip(): | |
| st.session_state.history.append(("You", prompt)) | |
| chat = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| # IMPORTANT: return_dict=True (this avoids your crash) | |
| inputs = tokenizer.apply_chat_template( | |
| chat, | |
| add_generation_prompt=True, | |
| return_tensors="pt", | |
| return_dict=True | |
| ) | |
| streamer = TextIteratorStreamer( | |
| tokenizer, | |
| skip_prompt=True, | |
| skip_special_tokens=True | |
| ) | |
| gen_kwargs = dict( | |
| **inputs, | |
| max_new_tokens=MAX_NEW_TOKENS, | |
| do_sample=True, | |
| temperature=TEMPERATURE, | |
| top_p=TOP_P, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.eos_token_id, | |
| streamer=streamer, | |
| ) | |
| thread = threading.Thread( | |
| target=model.generate, | |
| kwargs=gen_kwargs | |
| ) | |
| thread.start() | |
| placeholder = st.empty() | |
| output_text = "" | |
| for token in streamer: | |
| output_text += token | |
| placeholder.markdown(f"**Ureola:** {output_text}") | |
| st.session_state.history.append(("Ureola", output_text)) | |
| # ================= DISPLAY HISTORY ================= | |
| for speaker, text in st.session_state.history: | |
| if speaker == "You": | |
| st.markdown(f"**You:** {text}") | |
| else: | |
| st.markdown(f"**Ureola:** {text}") |