Spaces:
Sleeping
Sleeping
fix: resolve 3 failing CI tests
Browse files- drift: remove history pre-population; pad only get_history_series()
so chart still renders baseline but history count stays accurate
- retrieval: remove 0.20 score threshold so empty queries return top_k
- modules/drift.py +12 -13
- modules/retrieval.py +0 -2
modules/drift.py
CHANGED
|
@@ -73,17 +73,6 @@ class DriftDetector:
|
|
| 73 |
self._ewma[concept] = 0.0
|
| 74 |
|
| 75 |
logger.info("DriftDetector initialized with %d concept anchors.", len(concept_phrases))
|
| 76 |
-
|
| 77 |
-
# Prepopulate history so chart renders correctly on first load
|
| 78 |
-
for _ in range(5):
|
| 79 |
-
self.history.append(DriftEvent(
|
| 80 |
-
timestamp=time.time(),
|
| 81 |
-
query="[system initialization]",
|
| 82 |
-
scores={c: 0.15 for c in self._concept_embs},
|
| 83 |
-
dominant="normal"
|
| 84 |
-
))
|
| 85 |
-
for c in self._concept_embs:
|
| 86 |
-
self._ewma[c] = 0.15
|
| 87 |
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
|
| 89 |
def analyze_drift(
|
|
@@ -147,10 +136,20 @@ class DriftDetector:
|
|
| 147 |
}
|
| 148 |
|
| 149 |
def get_history_series(self) -> dict[str, list[float]]:
|
| 150 |
-
"""Return full EWMA time-series for each concept (for charts).
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
| 152 |
series: dict[str, list[float]] = {c: [] for c in self._concept_embs}
|
| 153 |
ewma_state = {c: 0.0 for c in self._concept_embs}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
for event in self.history:
|
| 155 |
for c in self._concept_embs:
|
| 156 |
ewma_state[c] = self.ewma_alpha * event.scores[c] + (1 - self.ewma_alpha) * ewma_state[c]
|
|
|
|
| 73 |
self._ewma[concept] = 0.0
|
| 74 |
|
| 75 |
logger.info("DriftDetector initialized with %d concept anchors.", len(concept_phrases))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 77 |
|
| 78 |
def analyze_drift(
|
|
|
|
| 136 |
}
|
| 137 |
|
| 138 |
def get_history_series(self) -> dict[str, list[float]]:
|
| 139 |
+
"""Return full EWMA time-series for each concept (for charts).
|
| 140 |
+
|
| 141 |
+
Pads with baseline values when fewer than 5 real events exist so the
|
| 142 |
+
chart renders a smooth baseline line on first load.
|
| 143 |
+
"""
|
| 144 |
series: dict[str, list[float]] = {c: [] for c in self._concept_embs}
|
| 145 |
ewma_state = {c: 0.0 for c in self._concept_embs}
|
| 146 |
+
|
| 147 |
+
# Pad with neutral baseline so chart always has something to show
|
| 148 |
+
padding = max(0, 5 - len(self.history))
|
| 149 |
+
for _ in range(padding):
|
| 150 |
+
for c in self._concept_embs:
|
| 151 |
+
series[c].append(0.15)
|
| 152 |
+
|
| 153 |
for event in self.history:
|
| 154 |
for c in self._concept_embs:
|
| 155 |
ewma_state[c] = self.ewma_alpha * event.scores[c] + (1 - self.ewma_alpha) * ewma_state[c]
|
modules/retrieval.py
CHANGED
|
@@ -84,8 +84,6 @@ class HybridRetriever:
|
|
| 84 |
|
| 85 |
results = []
|
| 86 |
for li in top_local:
|
| 87 |
-
if float(scores[li]) < 0.20:
|
| 88 |
-
continue
|
| 89 |
global_idx = candidate_indices[li]
|
| 90 |
results.append({
|
| 91 |
"product": self.catalog[global_idx],
|
|
|
|
| 84 |
|
| 85 |
results = []
|
| 86 |
for li in top_local:
|
|
|
|
|
|
|
| 87 |
global_idx = candidate_indices[li]
|
| 88 |
results.append({
|
| 89 |
"product": self.catalog[global_idx],
|