Spaces:
Sleeping
Sleeping
Decrease quality threshold from 7 to 6
Browse files- Weighted score threshold: 7.0 → 6.0
- Evidence grounding hard floor: 7 → 6
- Updated conditions.py, graph_cyclic.py, critic.py
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- src/graph_cyclic.py +1 -1
- src/nodes/critic.py +7 -7
- src/utils/conditions.py +3 -3
src/graph_cyclic.py
CHANGED
|
@@ -115,7 +115,7 @@ if __name__ == "__main__":
|
|
| 115 |
print(f" - Tracing: Enhanced LangSmith traces available")
|
| 116 |
|
| 117 |
# Quality assessment
|
| 118 |
-
if isinstance(final_score, (int, float)) and final_score >=
|
| 119 |
print(f" - Quality Assessment: ✅ PASSED ({final_score}/10)")
|
| 120 |
else:
|
| 121 |
print(f" - Quality Assessment: ⚠️ ACCEPTABLE ({final_score} - max revisions reached)")
|
|
|
|
| 115 |
print(f" - Tracing: Enhanced LangSmith traces available")
|
| 116 |
|
| 117 |
# Quality assessment
|
| 118 |
+
if isinstance(final_score, (int, float)) and final_score >= 6:
|
| 119 |
print(f" - Quality Assessment: ✅ PASSED ({final_score}/10)")
|
| 120 |
else:
|
| 121 |
print(f" - Quality Assessment: ⚠️ ACCEPTABLE ({final_score} - max revisions reached)")
|
src/nodes/critic.py
CHANGED
|
@@ -89,8 +89,8 @@ Required sections:
|
|
| 89 |
- 9-10: Impeccable; 7-8: Well-structured; 5-6: Readable but dense; 3-4: Hard to follow; 1-2: Poorly organized
|
| 90 |
|
| 91 |
## PASS CONDITIONS (ALL must be met)
|
| 92 |
-
1. Weighted average >=
|
| 93 |
-
2. Evidence Grounding >=
|
| 94 |
3. Constraint Compliance >= 6
|
| 95 |
4. No individual criterion below 5
|
| 96 |
|
|
@@ -127,7 +127,7 @@ CRITERION_WEIGHTS = {
|
|
| 127 |
|
| 128 |
# Hard floor requirements
|
| 129 |
HARD_FLOORS = {
|
| 130 |
-
"evidence_grounding":
|
| 131 |
"constraint_compliance": 6,
|
| 132 |
}
|
| 133 |
|
|
@@ -152,8 +152,8 @@ def check_pass_conditions(scores: dict, weighted_score: float) -> tuple:
|
|
| 152 |
violations = []
|
| 153 |
|
| 154 |
# Check weighted average threshold
|
| 155 |
-
if weighted_score <
|
| 156 |
-
violations.append(f"Weighted score {weighted_score:.1f} <
|
| 157 |
|
| 158 |
# Check hard floors
|
| 159 |
for criterion, floor in HARD_FLOORS.items():
|
|
@@ -286,14 +286,14 @@ def critic_node(state, workflow_id=None, progress_store=None):
|
|
| 286 |
Critic node with LLM-only weighted rubric evaluation.
|
| 287 |
|
| 288 |
Evaluates SWOT output on 6 criteria with weighted scoring:
|
| 289 |
-
- Evidence Grounding (25%) - hard floor >=
|
| 290 |
- Constraint Compliance (20%) - hard floor >= 6
|
| 291 |
- Specificity & Actionability (20%)
|
| 292 |
- Strategic Insight (15%)
|
| 293 |
- Completeness & Balance (10%)
|
| 294 |
- Clarity & Structure (10%)
|
| 295 |
|
| 296 |
-
Pass requires: weighted avg >=
|
| 297 |
"""
|
| 298 |
# Extract workflow_id and progress_store from state
|
| 299 |
if workflow_id is None:
|
|
|
|
| 89 |
- 9-10: Impeccable; 7-8: Well-structured; 5-6: Readable but dense; 3-4: Hard to follow; 1-2: Poorly organized
|
| 90 |
|
| 91 |
## PASS CONDITIONS (ALL must be met)
|
| 92 |
+
1. Weighted average >= 6.0
|
| 93 |
+
2. Evidence Grounding >= 6
|
| 94 |
3. Constraint Compliance >= 6
|
| 95 |
4. No individual criterion below 5
|
| 96 |
|
|
|
|
| 127 |
|
| 128 |
# Hard floor requirements
|
| 129 |
HARD_FLOORS = {
|
| 130 |
+
"evidence_grounding": 6,
|
| 131 |
"constraint_compliance": 6,
|
| 132 |
}
|
| 133 |
|
|
|
|
| 152 |
violations = []
|
| 153 |
|
| 154 |
# Check weighted average threshold
|
| 155 |
+
if weighted_score < 6.0:
|
| 156 |
+
violations.append(f"Weighted score {weighted_score:.1f} < 6.0 threshold")
|
| 157 |
|
| 158 |
# Check hard floors
|
| 159 |
for criterion, floor in HARD_FLOORS.items():
|
|
|
|
| 286 |
Critic node with LLM-only weighted rubric evaluation.
|
| 287 |
|
| 288 |
Evaluates SWOT output on 6 criteria with weighted scoring:
|
| 289 |
+
- Evidence Grounding (25%) - hard floor >= 6
|
| 290 |
- Constraint Compliance (20%) - hard floor >= 6
|
| 291 |
- Specificity & Actionability (20%)
|
| 292 |
- Strategic Insight (15%)
|
| 293 |
- Completeness & Balance (10%)
|
| 294 |
- Clarity & Structure (10%)
|
| 295 |
|
| 296 |
+
Pass requires: weighted avg >= 6.0, hard floors met, no score < 5
|
| 297 |
"""
|
| 298 |
# Extract workflow_id and progress_store from state
|
| 299 |
if workflow_id is None:
|
src/utils/conditions.py
CHANGED
|
@@ -9,11 +9,11 @@ def should_continue(state) -> Literal["exit", "retry"]:
|
|
| 9 |
Exit conditions:
|
| 10 |
- Error set (LLM providers failed - abort immediately)
|
| 11 |
- Analyzer revision skipped (LLM failed but using fallback draft - exit gracefully)
|
| 12 |
-
- Score >=
|
| 13 |
- Revision count > 3 (max attempts reached)
|
| 14 |
|
| 15 |
Continue conditions:
|
| 16 |
-
- No error AND No revision skip AND Score <
|
| 17 |
"""
|
| 18 |
# Abort immediately if error is set (critical failure)
|
| 19 |
if state.get("error"):
|
|
@@ -27,7 +27,7 @@ def should_continue(state) -> Literal["exit", "retry"]:
|
|
| 27 |
revision_count = state.get("revision_count", 0)
|
| 28 |
|
| 29 |
# Exit if quality is good enough or max revisions exceeded
|
| 30 |
-
if current_score >=
|
| 31 |
return "exit"
|
| 32 |
|
| 33 |
# Continue the loop for improvement
|
|
|
|
| 9 |
Exit conditions:
|
| 10 |
- Error set (LLM providers failed - abort immediately)
|
| 11 |
- Analyzer revision skipped (LLM failed but using fallback draft - exit gracefully)
|
| 12 |
+
- Score >= 6 (good quality)
|
| 13 |
- Revision count > 3 (max attempts reached)
|
| 14 |
|
| 15 |
Continue conditions:
|
| 16 |
+
- No error AND No revision skip AND Score < 6 AND Revisions <= 3
|
| 17 |
"""
|
| 18 |
# Abort immediately if error is set (critical failure)
|
| 19 |
if state.get("error"):
|
|
|
|
| 27 |
revision_count = state.get("revision_count", 0)
|
| 28 |
|
| 29 |
# Exit if quality is good enough or max revisions exceeded
|
| 30 |
+
if current_score >= 6 or revision_count > 3:
|
| 31 |
return "exit"
|
| 32 |
|
| 33 |
# Continue the loop for improvement
|