Neon-AI commited on
Commit
54b468c
·
verified ·
1 Parent(s): 8b77b38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -78
app.py CHANGED
@@ -2,126 +2,88 @@ import streamlit as st
2
  import torch
3
  import threading
4
  from transformers import (
5
- Qwen3ForCausalLM,
6
  AutoTokenizer,
7
  TextIteratorStreamer,
8
  )
9
 
10
  # ================= CONFIG =================
11
- MODEL_ID = "Neon-AI/Kushina" # Hugging Face repo
12
- MAX_NEW_TOKENS = 16384 # CPU-friendly
13
  TEMPERATURE = 0.7
14
  TOP_P = 0.9
15
-
16
  # ==========================================
 
17
  st.set_page_config(page_title="Ureola", layout="centered")
18
- st.title("🧏🏾‍♀️ Ureola")
19
- st.caption("HF Free Space · CPU · Streaming · Memory")
20
 
21
  # ================= LOAD MODEL =================
22
  @st.cache_resource
23
  def load_model():
24
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
25
  model = AutoModelForCausalLM.from_pretrained(
26
  MODEL_ID,
27
- torch_dtype=torch.float32,
28
  )
 
29
  model.eval()
30
  return tokenizer, model
31
 
 
32
  tokenizer, model = load_model()
33
 
34
  # ================= SESSION STATE =================
35
  if "history" not in st.session_state:
36
  st.session_state.history = []
37
- if "memory" not in st.session_state:
38
- st.session_state.memory = ""
39
 
40
  # ================= SYSTEM PROMPT =================
41
- BASE_SYSTEM_PROMPT = """You are Ureola.
 
42
  You operate in exactly ONE of three modes, but you never talk to users about them.
43
 
44
- ====================MODE: CHAT====================
45
- Rules:
46
- - Mirror the user's tone precisely.
47
- - Replies must be short (1–3 sentences).
48
  - No emojis unless user uses them first.
49
- - No explanations unless explicitly asked.
50
 
51
- ====================MODE: CODE====================
52
- Rules:
53
- - Output ONLY code unless explicitly asked to explain.
54
- - No personality, no commentary.
55
 
56
- ====================MODE: ACADEMIC====================
57
- Rules:
58
  - Neutral, formal tone.
59
  - Clear structure.
60
  - Fully answer the task.
61
 
62
- ====================MODE SELECTION====================
63
- CODE → if user asks for code, script, app, api, algorithm
64
- ACADEMIC → essay, explanation, homework, analysis
65
- Otherwise → CHAT
66
 
67
- ====================IDENTITY====================
68
  Name: Ureola
69
  Creator: Neon
70
  Mention Neon ONLY if explicitly asked.
71
  """.strip()
72
 
73
-
74
- def build_system_prompt():
75
- """Include memory in the system prompt."""
76
- if st.session_state.memory.strip():
77
- return f"{BASE_SYSTEM_PROMPT}\n====================MEMORY====================\n{st.session_state.memory}"
78
- return BASE_SYSTEM_PROMPT
79
-
80
-
81
- # ================= MEMORY UPDATE =================
82
- def maybe_update_memory(user_text: str, assistant_text: str):
83
- """Update memory every message, append stable facts."""
84
- memory_prompt = f"""Extract LONG-TERM memory.
85
- Rules:
86
- - Max 5 bullet points
87
- - Each bullet ≤ 15 words
88
- - Only stable preferences/facts
89
- - Ignore jokes, emotions, temporary info
90
- - If nothing important, return EXACTLY: NONE
91
-
92
- Current memory:{st.session_state.memory or "None"}
93
- Conversation:
94
- User: {user_text}
95
- Assistant: {assistant_text}"""
96
-
97
- inputs = tokenizer(memory_prompt, return_tensors="pt")
98
- with torch.no_grad():
99
- output = model.generate(
100
- **inputs,
101
- max_new_tokens=120, # CPU-friendly
102
- do_sample=False
103
- )
104
- text = tokenizer.decode(output[0], skip_special_tokens=True).strip()
105
- if text and text != "NONE":
106
- if st.session_state.memory:
107
- st.session_state.memory += "\n" + text
108
- else:
109
- st.session_state.memory = text
110
-
111
-
112
  # ================= INPUT =================
113
  prompt = st.text_input("You", placeholder="Say something…")
114
 
115
  if st.button("Send") and prompt.strip():
116
  st.session_state.history.append(("You", prompt))
117
- system_prompt = build_system_prompt()
118
 
119
  chat = [
120
- {"role": "system", "content": system_prompt},
121
  {"role": "user", "content": prompt},
122
  ]
123
 
124
- # Tokenizer helper
125
  inputs = tokenizer.apply_chat_template(
126
  chat,
127
  add_generation_prompt=True,
@@ -129,14 +91,12 @@ if st.button("Send") and prompt.strip():
129
  return_dict=True
130
  )
131
 
132
- # Streamer
133
  streamer = TextIteratorStreamer(
134
  tokenizer,
135
  skip_prompt=True,
136
  skip_special_tokens=True
137
  )
138
 
139
- # Generation arguments
140
  gen_kwargs = dict(
141
  **inputs,
142
  max_new_tokens=MAX_NEW_TOKENS,
@@ -145,25 +105,24 @@ if st.button("Send") and prompt.strip():
145
  top_p=TOP_P,
146
  eos_token_id=tokenizer.eos_token_id,
147
  pad_token_id=tokenizer.eos_token_id,
148
- streamer=streamer
149
  )
150
 
151
- # Run in separate thread
152
- thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
 
 
153
  thread.start()
154
 
155
  placeholder = st.empty()
156
  output_text = ""
 
157
  for token in streamer:
158
  output_text += token
159
  placeholder.markdown(f"**Ureola:** {output_text}")
160
 
161
- # Append to history
162
  st.session_state.history.append(("Ureola", output_text))
163
 
164
- # Update memory immediately
165
- maybe_update_memory(prompt, output_text)
166
-
167
  # ================= DISPLAY HISTORY =================
168
  for speaker, text in st.session_state.history:
169
  if speaker == "You":
 
2
  import torch
3
  import threading
4
  from transformers import (
5
+ AutoModelForCausalLM,
6
  AutoTokenizer,
7
  TextIteratorStreamer,
8
  )
9
 
10
  # ================= CONFIG =================
11
+ MODEL_ID = "Neon-AI/Kushina"
12
+ MAX_NEW_TOKENS = 16384
13
  TEMPERATURE = 0.7
14
  TOP_P = 0.9
 
15
  # ==========================================
16
+
17
  st.set_page_config(page_title="Ureola", layout="centered")
18
+ st.title("🧠 Ureola")
19
+ st.caption("HF Free Space · CPU · Streaming")
20
 
21
  # ================= LOAD MODEL =================
22
  @st.cache_resource
23
  def load_model():
24
+ tokenizer = AutoTokenizer.from_pretrained(
25
+ MODEL_ID,
26
+ trust_remote_code=True
27
+ )
28
+
29
  model = AutoModelForCausalLM.from_pretrained(
30
  MODEL_ID,
31
+ torch_dtype=torch.float32
32
  )
33
+
34
  model.eval()
35
  return tokenizer, model
36
 
37
+
38
  tokenizer, model = load_model()
39
 
40
  # ================= SESSION STATE =================
41
  if "history" not in st.session_state:
42
  st.session_state.history = []
 
 
43
 
44
  # ================= SYSTEM PROMPT =================
45
+ SYSTEM_PROMPT = """
46
+ You are Ureola.
47
  You operate in exactly ONE of three modes, but you never talk to users about them.
48
 
49
+ MODE: CHAT
50
+ - Mirror the user's tone.
51
+ - Replies are short (1–3 sentences).
 
52
  - No emojis unless user uses them first.
53
+ - No explanations unless asked.
54
 
55
+ MODE: CODE
56
+ - Output ONLY code unless asked to explain.
57
+ - No personality or commentary.
 
58
 
59
+ MODE: ACADEMIC
 
60
  - Neutral, formal tone.
61
  - Clear structure.
62
  - Fully answer the task.
63
 
64
+ MODE SELECTION:
65
+ - CODE → code, script, program, app, api, algorithm
66
+ - ACADEMIC → essay, explanation, homework, analysis
67
+ - Otherwise → CHAT
68
 
69
+ IDENTITY:
70
  Name: Ureola
71
  Creator: Neon
72
  Mention Neon ONLY if explicitly asked.
73
  """.strip()
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # ================= INPUT =================
76
  prompt = st.text_input("You", placeholder="Say something…")
77
 
78
  if st.button("Send") and prompt.strip():
79
  st.session_state.history.append(("You", prompt))
 
80
 
81
  chat = [
82
+ {"role": "system", "content": SYSTEM_PROMPT},
83
  {"role": "user", "content": prompt},
84
  ]
85
 
86
+ # IMPORTANT: return_dict=True (this avoids your crash)
87
  inputs = tokenizer.apply_chat_template(
88
  chat,
89
  add_generation_prompt=True,
 
91
  return_dict=True
92
  )
93
 
 
94
  streamer = TextIteratorStreamer(
95
  tokenizer,
96
  skip_prompt=True,
97
  skip_special_tokens=True
98
  )
99
 
 
100
  gen_kwargs = dict(
101
  **inputs,
102
  max_new_tokens=MAX_NEW_TOKENS,
 
105
  top_p=TOP_P,
106
  eos_token_id=tokenizer.eos_token_id,
107
  pad_token_id=tokenizer.eos_token_id,
108
+ streamer=streamer,
109
  )
110
 
111
+ thread = threading.Thread(
112
+ target=model.generate,
113
+ kwargs=gen_kwargs
114
+ )
115
  thread.start()
116
 
117
  placeholder = st.empty()
118
  output_text = ""
119
+
120
  for token in streamer:
121
  output_text += token
122
  placeholder.markdown(f"**Ureola:** {output_text}")
123
 
 
124
  st.session_state.history.append(("Ureola", output_text))
125
 
 
 
 
126
  # ================= DISPLAY HISTORY =================
127
  for speaker, text in st.session_state.history:
128
  if speaker == "You":