Update context_parser.py
Browse files- context_parser.py +75 -0
context_parser.py
CHANGED
|
@@ -1,6 +1,81 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def detect_intent(text: str) -> str:
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import re
|
| 4 |
+
from typing import Any, Dict, Tuple
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def split_unity_message(text: str) -> Tuple[str, str]:
|
| 8 |
+
"""
|
| 9 |
+
Splits a Unity-style message into:
|
| 10 |
+
- game/context prefix
|
| 11 |
+
- user-facing message/question
|
| 12 |
+
|
| 13 |
+
If no structured prefix is found, returns ("", original_text).
|
| 14 |
+
"""
|
| 15 |
+
raw = (text or "").strip()
|
| 16 |
+
if not raw:
|
| 17 |
+
return "", ""
|
| 18 |
+
|
| 19 |
+
# Common pattern: "QUESTION: ... USER: ..."
|
| 20 |
+
m = re.search(
|
| 21 |
+
r"(?is)^(.*?)(?:user|question|prompt)\s*:\s*(.+)$",
|
| 22 |
+
raw,
|
| 23 |
+
)
|
| 24 |
+
if m:
|
| 25 |
+
prefix = (m.group(1) or "").strip()
|
| 26 |
+
message = (m.group(2) or "").strip()
|
| 27 |
+
return prefix, message
|
| 28 |
+
|
| 29 |
+
return "", raw
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def extract_game_context_fields(text: str) -> Dict[str, Any]:
|
| 33 |
+
"""
|
| 34 |
+
Lightweight parser for structured context coming from Unity / study messages.
|
| 35 |
+
|
| 36 |
+
Returns a dict with whatever fields can be detected.
|
| 37 |
+
Safe fallback: missing fields just remain absent.
|
| 38 |
+
"""
|
| 39 |
+
raw = (text or "").strip()
|
| 40 |
+
lower = raw.lower()
|
| 41 |
+
|
| 42 |
+
fields: Dict[str, Any] = {}
|
| 43 |
+
|
| 44 |
+
# Money / balance
|
| 45 |
+
money_match = re.search(r"\b(?:money|balance|bank)\s*[:=]\s*([\-]?\d+(?:\.\d+)?)", raw, flags=re.I)
|
| 46 |
+
if money_match:
|
| 47 |
+
try:
|
| 48 |
+
fields["money"] = float(money_match.group(1))
|
| 49 |
+
except Exception:
|
| 50 |
+
pass
|
| 51 |
+
|
| 52 |
+
# Difficulty
|
| 53 |
+
difficulty_match = re.search(r"\bdifficulty\s*[:=]\s*([A-Za-z0-9_\- ]+)", raw, flags=re.I)
|
| 54 |
+
if difficulty_match:
|
| 55 |
+
fields["difficulty"] = difficulty_match.group(1).strip()
|
| 56 |
+
|
| 57 |
+
# Category / topic
|
| 58 |
+
category_match = re.search(r"\b(?:category|topic)\s*[:=]\s*([A-Za-z0-9_\- /]+)", raw, flags=re.I)
|
| 59 |
+
if category_match:
|
| 60 |
+
fields["category"] = category_match.group(1).strip()
|
| 61 |
+
|
| 62 |
+
# Question text
|
| 63 |
+
question_match = re.search(r"\bquestion\s*[:=]\s*(.+)", raw, flags=re.I)
|
| 64 |
+
if question_match:
|
| 65 |
+
q = question_match.group(1).strip()
|
| 66 |
+
if q:
|
| 67 |
+
fields["question_text"] = q
|
| 68 |
+
|
| 69 |
+
# Answer choices in a rough form
|
| 70 |
+
if re.search(r"\bA[\)\.\:]", raw) and re.search(r"\bB[\)\.\:]", raw):
|
| 71 |
+
fields["has_choices"] = True
|
| 72 |
+
|
| 73 |
+
# Cheap flags
|
| 74 |
+
fields["looks_like_quant"] = any(
|
| 75 |
+
token in lower for token in ["solve", "equation", "percent", "%", "ratio", "probability", "mean", "median"]
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return fields
|
| 79 |
|
| 80 |
|
| 81 |
def detect_intent(text: str) -> str:
|