j-js commited on
Commit
1460fe5
·
verified ·
1 Parent(s): ea41352

Update context_parser.py

Browse files
Files changed (1) hide show
  1. context_parser.py +112 -46
context_parser.py CHANGED
@@ -1,84 +1,150 @@
1
  from __future__ import annotations
2
 
3
  import re
4
- from typing import Any, Dict, Tuple
5
 
6
 
7
  def split_unity_message(text: str) -> Tuple[str, str]:
8
  """
9
  Splits a Unity-style message into:
10
- - game/context prefix
11
- - user-facing message/question
12
 
13
- If no structured prefix is found, returns ("", original_text).
14
  """
15
  raw = (text or "").strip()
16
  if not raw:
17
  return "", ""
18
 
19
- # Common pattern: "QUESTION: ... USER: ..."
20
- m = re.search(
21
- r"(?is)^(.*?)(?:user|question|prompt)\s*:\s*(.+)$",
22
- raw,
23
- )
24
  if m:
25
- prefix = (m.group(1) or "").strip()
26
- message = (m.group(2) or "").strip()
27
- return prefix, message
28
 
29
  return "", raw
30
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def extract_game_context_fields(text: str) -> Dict[str, Any]:
33
  """
34
- Lightweight parser for structured context coming from Unity / study messages.
35
-
36
- Returns a dict with whatever fields can be detected.
37
- Safe fallback: missing fields just remain absent.
38
  """
39
  raw = (text or "").strip()
40
- lower = raw.lower()
41
 
42
- fields: Dict[str, Any] = {}
 
 
 
 
 
 
 
 
43
 
44
- # Money / balance
45
- money_match = re.search(r"\b(?:money|balance|bank)\s*[:=]\s*([\-]?\d+(?:\.\d+)?)", raw, flags=re.I)
46
- if money_match:
47
- try:
48
- fields["money"] = float(money_match.group(1))
49
- except Exception:
50
- pass
51
 
52
- # Difficulty
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  difficulty_match = re.search(r"\bdifficulty\s*[:=]\s*([A-Za-z0-9_\- ]+)", raw, flags=re.I)
54
  if difficulty_match:
55
- fields["difficulty"] = difficulty_match.group(1).strip()
56
 
57
- # Category / topic
58
  category_match = re.search(r"\b(?:category|topic)\s*[:=]\s*([A-Za-z0-9_\- /]+)", raw, flags=re.I)
59
  if category_match:
60
- fields["category"] = category_match.group(1).strip()
61
-
62
- # Question text
63
- question_match = re.search(r"\bquestion\s*[:=]\s*(.+)", raw, flags=re.I)
64
- if question_match:
65
- q = question_match.group(1).strip()
66
- if q:
67
- fields["question_text"] = q
68
-
69
- # Answer choices in a rough form
70
- if re.search(r"\bA[\)\.\:]", raw) and re.search(r"\bB[\)\.\:]", raw):
71
- fields["has_choices"] = True
72
-
73
- # Cheap flags
74
- fields["looks_like_quant"] = any(
75
- token in lower for token in ["solve", "equation", "percent", "%", "ratio", "probability", "mean", "median"]
 
 
 
 
 
 
 
 
 
 
 
76
  )
77
 
78
- return fields
79
 
80
 
81
- def detect_intent(text: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  t = (text or "").strip().lower()
83
 
84
  if not t:
 
1
  from __future__ import annotations
2
 
3
  import re
4
+ from typing import Any, Dict, List, Optional, Tuple
5
 
6
 
7
  def split_unity_message(text: str) -> Tuple[str, str]:
8
  """
9
  Splits a Unity-style message into:
10
+ - hidden/system/game context prefix
11
+ - actual user-facing message
12
 
13
+ If no obvious split is found, returns ("", original_text).
14
  """
15
  raw = (text or "").strip()
16
  if not raw:
17
  return "", ""
18
 
19
+ # Pattern like:
20
+ # CONTEXT: ...
21
+ # USER: ...
22
+ m = re.search(r"(?is)^(.*?)(?:\buser\b|\bprompt\b|\bmessage\b)\s*:\s*(.+)$", raw)
 
23
  if m:
24
+ hidden = (m.group(1) or "").strip()
25
+ user = (m.group(2) or "").strip()
26
+ return hidden, user
27
 
28
  return "", raw
29
 
30
 
31
+ def _extract_options(text: str) -> List[str]:
32
+ if not text:
33
+ return []
34
+
35
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
36
+ options: List[str] = []
37
+
38
+ for line in lines:
39
+ if re.match(r"^[A-E][\)\.\:]\s*", line, flags=re.I):
40
+ options.append(re.sub(r"^[A-E][\)\.\:]\s*", "", line, flags=re.I).strip())
41
+
42
+ if options:
43
+ return options
44
+
45
+ # fallback: inline A) ... B) ...
46
+ matches = re.findall(r"(?:^|\s)([A-E])[\)\.\:]\s*(.*?)(?=(?:\s+[A-E][\)\.\:])|$)", text, flags=re.I | re.S)
47
+ if matches:
48
+ return [m[1].strip() for m in matches if m[1].strip()]
49
+
50
+ return []
51
+
52
+
53
  def extract_game_context_fields(text: str) -> Dict[str, Any]:
54
  """
55
+ Extracts lightweight structured fields from hidden Unity/game context.
56
+ Always returns stable keys expected by app.py.
 
 
57
  """
58
  raw = (text or "").strip()
 
59
 
60
+ result: Dict[str, Any] = {
61
+ "question": "",
62
+ "options": [],
63
+ "difficulty": None,
64
+ "category": None,
65
+ "money": None,
66
+ "has_choices": False,
67
+ "looks_like_quant": False,
68
+ }
69
 
70
+ if not raw:
71
+ return result
 
 
 
 
 
72
 
73
+ # question
74
+ q_match = re.search(r"\bquestion\s*[:=]\s*(.+?)(?=\n[A-Za-z_ ]+\s*[:=]|\Z)", raw, flags=re.I | re.S)
75
+ if q_match:
76
+ result["question"] = q_match.group(1).strip()
77
+
78
+ # options block
79
+ opt_match = re.search(r"\b(?:options|choices|answers)\s*[:=]\s*(.+?)(?=\n[A-Za-z_ ]+\s*[:=]|\Z)", raw, flags=re.I | re.S)
80
+ if opt_match:
81
+ result["options"] = _extract_options(opt_match.group(1))
82
+
83
+ # if no explicit options block, scan whole context
84
+ if not result["options"]:
85
+ result["options"] = _extract_options(raw)
86
+
87
+ result["has_choices"] = len(result["options"]) > 0
88
+
89
+ # difficulty
90
  difficulty_match = re.search(r"\bdifficulty\s*[:=]\s*([A-Za-z0-9_\- ]+)", raw, flags=re.I)
91
  if difficulty_match:
92
+ result["difficulty"] = difficulty_match.group(1).strip()
93
 
94
+ # category/topic
95
  category_match = re.search(r"\b(?:category|topic)\s*[:=]\s*([A-Za-z0-9_\- /]+)", raw, flags=re.I)
96
  if category_match:
97
+ result["category"] = category_match.group(1).strip()
98
+
99
+ # money/balance
100
+ money_match = re.search(r"\b(?:money|balance|bank)\s*[:=]\s*([\-]?\d+(?:\.\d+)?)", raw, flags=re.I)
101
+ if money_match:
102
+ try:
103
+ result["money"] = float(money_match.group(1))
104
+ except Exception:
105
+ pass
106
+
107
+ lower = raw.lower()
108
+ result["looks_like_quant"] = any(
109
+ token in lower
110
+ for token in [
111
+ "solve",
112
+ "equation",
113
+ "percent",
114
+ "%",
115
+ "ratio",
116
+ "probability",
117
+ "mean",
118
+ "median",
119
+ "algebra",
120
+ "integer",
121
+ "triangle",
122
+ "circle",
123
+ ]
124
  )
125
 
126
+ return result
127
 
128
 
129
+ def detect_intent(text: str, incoming_help_mode: Optional[str] = None) -> str:
130
+ """
131
+ Returns one of:
132
+ answer, hint, instruction, walkthrough, explain, method, definition, concept
133
+ """
134
+ forced = (incoming_help_mode or "").strip().lower()
135
+ if forced in {
136
+ "answer",
137
+ "hint",
138
+ "instruction",
139
+ "walkthrough",
140
+ "step_by_step",
141
+ "explain",
142
+ "method",
143
+ "definition",
144
+ "concept",
145
+ }:
146
+ return forced
147
+
148
  t = (text or "").strip().lower()
149
 
150
  if not t: