Spaces:
Running
Running
nothex commited on
Commit ·
ca5537d
1
Parent(s): ee9c75b
feat: add dual eval review and selectable URL crawling
Browse files- .env.example +13 -0
- backend/api/admin.py +65 -10
- backend/core/config.py +29 -0
- backend/core/url_ingestion.py +245 -27
- backend/eval/draft_reviewer.py +138 -6
- docs/operations_playbook.md +3 -0
- docs/release_checklist.md +4 -0
- frontend/index.html +8 -0
- frontend/js/admin.js +37 -5
- frontend/js/graph.js +17 -2
- frontend/js/state.js +1 -0
- tests/test_pipeline_regressions.py +76 -0
- tests/test_url_ingestion.py +83 -2
.env.example
CHANGED
|
@@ -13,8 +13,21 @@ NVIDIA_API_BASE_URL=https://integrate.api.nvidia.com/v1
|
|
| 13 |
# Admin review backend
|
| 14 |
ADMIN_REVIEW_PROVIDER=auto
|
| 15 |
ADMIN_REVIEW_MODEL=gemma4:latest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
URL_INGEST_ADMIN_ENABLED=true
|
| 17 |
URL_INGEST_ALLOWED_HOSTS=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
URL_INGEST_TIMEOUT_S=12
|
| 19 |
URL_INGEST_MAX_BYTES=1500000
|
| 20 |
|
|
|
|
| 13 |
# Admin review backend
|
| 14 |
ADMIN_REVIEW_PROVIDER=auto
|
| 15 |
ADMIN_REVIEW_MODEL=gemma4:latest
|
| 16 |
+
ADMIN_REVIEW_PRIMARY_PROVIDER=nvidia
|
| 17 |
+
ADMIN_REVIEW_PRIMARY_MODEL=google/gemma-4-31b-it
|
| 18 |
+
ADMIN_REVIEW_SECONDARY_PROVIDER=ollama
|
| 19 |
+
ADMIN_REVIEW_SECONDARY_MODEL=gemma4:26b
|
| 20 |
+
ADMIN_REVIEW_CONSENSUS_ENABLED=true
|
| 21 |
+
ADMIN_REVIEW_CONSENSUS_MIN_CONFIDENCE=0.75
|
| 22 |
URL_INGEST_ADMIN_ENABLED=true
|
| 23 |
URL_INGEST_ALLOWED_HOSTS=
|
| 24 |
+
URL_INGEST_EXTRACTOR=basic
|
| 25 |
+
URL_INGEST_CRAWL4AI_ENABLED=false
|
| 26 |
+
URL_INGEST_CRAWL4AI_FALLBACK=false
|
| 27 |
+
URL_INGEST_DEFAULT_MODE=single_page
|
| 28 |
+
URL_INGEST_MAX_DEPTH=1
|
| 29 |
+
URL_INGEST_MAX_PAGES=5
|
| 30 |
+
URL_INGEST_CRAWL4AI_TIMEOUT_S=20
|
| 31 |
URL_INGEST_TIMEOUT_S=12
|
| 32 |
URL_INGEST_MAX_BYTES=1500000
|
| 33 |
|
backend/api/admin.py
CHANGED
|
@@ -54,12 +54,18 @@ class SeedEvalDatasetPayload(BaseModel):
|
|
| 54 |
|
| 55 |
class DraftReviewPayload(BaseModel):
|
| 56 |
model: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
| 57 |
force: bool = False
|
| 58 |
|
| 59 |
|
| 60 |
class BatchDraftReviewPayload(BaseModel):
|
| 61 |
limit: int = 10
|
| 62 |
model: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
| 63 |
force: bool = False
|
| 64 |
reviewed_only: bool = False
|
| 65 |
|
|
@@ -72,6 +78,9 @@ class CodeGraphIndexPayload(BaseModel):
|
|
| 72 |
class UrlIngestPayload(BaseModel):
|
| 73 |
url: str
|
| 74 |
label: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
def _admin_client():
|
|
@@ -815,21 +824,42 @@ def generate_eval_dataset_draft_review(
|
|
| 815 |
"skipped": True,
|
| 816 |
}
|
| 817 |
|
| 818 |
-
from backend.eval.draft_reviewer import
|
|
|
|
|
|
|
|
|
|
| 819 |
|
| 820 |
trace_row = _load_trace_for_dataset(dataset_row.get("trace_id"))
|
| 821 |
feedback_rows = _load_feedback_for_dataset(dataset_row.get("trace_id"))
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
now_iso = datetime.now(timezone.utc).isoformat()
|
| 829 |
_admin_client().table("evaluation_datasets").update(
|
| 830 |
{
|
| 831 |
"ai_review_draft": draft,
|
| 832 |
-
"ai_review_model":
|
| 833 |
"ai_review_generated_at": now_iso,
|
| 834 |
}
|
| 835 |
).eq("id", dataset_id).execute()
|
|
@@ -857,7 +887,13 @@ def batch_generate_eval_dataset_draft_reviews(
|
|
| 857 |
for row in candidates[:batch_limit]:
|
| 858 |
result = generate_eval_dataset_draft_review(
|
| 859 |
int(row["id"]),
|
| 860 |
-
DraftReviewPayload(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
x_admin_key=x_admin_key,
|
| 862 |
)
|
| 863 |
generated.append(
|
|
@@ -880,6 +916,13 @@ def apply_eval_dataset_draft_review(
|
|
| 880 |
draft = row.get("ai_review_draft") or {}
|
| 881 |
if not draft:
|
| 882 |
raise HTTPException(status_code=404, detail="No AI draft review found for this row.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 883 |
|
| 884 |
now_iso = datetime.now(timezone.utc).isoformat()
|
| 885 |
update = {
|
|
@@ -947,6 +990,11 @@ def get_url_ingest_options(
|
|
| 947 |
"ok": True,
|
| 948 |
"enabled": bool(config.URL_INGEST_ADMIN_ENABLED),
|
| 949 |
"allowed_hosts": list(config.URL_INGEST_ALLOWED_HOSTS),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 950 |
"timeout_s": config.URL_INGEST_TIMEOUT_S,
|
| 951 |
"max_bytes": config.URL_INGEST_MAX_BYTES,
|
| 952 |
"user_id": user_id,
|
|
@@ -964,7 +1012,13 @@ def run_url_ingest(
|
|
| 964 |
_check_admin(x_admin_key)
|
| 965 |
_ensure_url_ingest_enabled()
|
| 966 |
try:
|
| 967 |
-
fetched = fetch_url_to_tempfile(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 968 |
except UrlIngestionError as exc:
|
| 969 |
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
| 970 |
except Exception as exc:
|
|
@@ -996,4 +1050,5 @@ def run_url_ingest(
|
|
| 996 |
"content_type": fetched.content_type,
|
| 997 |
"content_bytes": fetched.content_bytes,
|
| 998 |
"title": fetched.title,
|
|
|
|
| 999 |
}
|
|
|
|
| 54 |
|
| 55 |
class DraftReviewPayload(BaseModel):
|
| 56 |
model: Optional[str] = None
|
| 57 |
+
primary_model: Optional[str] = None
|
| 58 |
+
secondary_model: Optional[str] = None
|
| 59 |
+
dual_review: bool = False
|
| 60 |
force: bool = False
|
| 61 |
|
| 62 |
|
| 63 |
class BatchDraftReviewPayload(BaseModel):
|
| 64 |
limit: int = 10
|
| 65 |
model: Optional[str] = None
|
| 66 |
+
primary_model: Optional[str] = None
|
| 67 |
+
secondary_model: Optional[str] = None
|
| 68 |
+
dual_review: bool = False
|
| 69 |
force: bool = False
|
| 70 |
reviewed_only: bool = False
|
| 71 |
|
|
|
|
| 78 |
class UrlIngestPayload(BaseModel):
|
| 79 |
url: str
|
| 80 |
label: Optional[str] = None
|
| 81 |
+
mode: Optional[str] = None
|
| 82 |
+
max_depth: Optional[int] = None
|
| 83 |
+
max_pages: Optional[int] = None
|
| 84 |
|
| 85 |
|
| 86 |
def _admin_client():
|
|
|
|
| 824 |
"skipped": True,
|
| 825 |
}
|
| 826 |
|
| 827 |
+
from backend.eval.draft_reviewer import (
|
| 828 |
+
generate_eval_dataset_draft_review as _generate,
|
| 829 |
+
generate_eval_dataset_dual_draft_review as _generate_dual,
|
| 830 |
+
)
|
| 831 |
|
| 832 |
trace_row = _load_trace_for_dataset(dataset_row.get("trace_id"))
|
| 833 |
feedback_rows = _load_feedback_for_dataset(dataset_row.get("trace_id"))
|
| 834 |
+
if payload.dual_review and config.ADMIN_REVIEW_CONSENSUS_ENABLED:
|
| 835 |
+
draft = _generate_dual(
|
| 836 |
+
dataset_row,
|
| 837 |
+
trace_row=trace_row,
|
| 838 |
+
feedback_rows=feedback_rows,
|
| 839 |
+
primary_model=payload.primary_model,
|
| 840 |
+
secondary_model=payload.secondary_model,
|
| 841 |
+
)
|
| 842 |
+
model_label = ",".join(
|
| 843 |
+
model
|
| 844 |
+
for model in [
|
| 845 |
+
(draft.get("primary") or {}).get("model"),
|
| 846 |
+
(draft.get("secondary") or {}).get("model"),
|
| 847 |
+
]
|
| 848 |
+
if model
|
| 849 |
+
)
|
| 850 |
+
else:
|
| 851 |
+
draft = _generate(
|
| 852 |
+
dataset_row,
|
| 853 |
+
trace_row=trace_row,
|
| 854 |
+
feedback_rows=feedback_rows,
|
| 855 |
+
model=payload.model,
|
| 856 |
+
)
|
| 857 |
+
model_label = draft.get("model")
|
| 858 |
now_iso = datetime.now(timezone.utc).isoformat()
|
| 859 |
_admin_client().table("evaluation_datasets").update(
|
| 860 |
{
|
| 861 |
"ai_review_draft": draft,
|
| 862 |
+
"ai_review_model": model_label,
|
| 863 |
"ai_review_generated_at": now_iso,
|
| 864 |
}
|
| 865 |
).eq("id", dataset_id).execute()
|
|
|
|
| 887 |
for row in candidates[:batch_limit]:
|
| 888 |
result = generate_eval_dataset_draft_review(
|
| 889 |
int(row["id"]),
|
| 890 |
+
DraftReviewPayload(
|
| 891 |
+
model=payload.model,
|
| 892 |
+
primary_model=payload.primary_model,
|
| 893 |
+
secondary_model=payload.secondary_model,
|
| 894 |
+
dual_review=payload.dual_review,
|
| 895 |
+
force=payload.force,
|
| 896 |
+
),
|
| 897 |
x_admin_key=x_admin_key,
|
| 898 |
)
|
| 899 |
generated.append(
|
|
|
|
| 916 |
draft = row.get("ai_review_draft") or {}
|
| 917 |
if not draft:
|
| 918 |
raise HTTPException(status_code=404, detail="No AI draft review found for this row.")
|
| 919 |
+
if "consensus" in draft:
|
| 920 |
+
draft = draft.get("consensus") or {}
|
| 921 |
+
if not draft.get("apply_ready"):
|
| 922 |
+
raise HTTPException(
|
| 923 |
+
status_code=409,
|
| 924 |
+
detail="AI reviewers disagree. Manually review this dataset row before applying.",
|
| 925 |
+
)
|
| 926 |
|
| 927 |
now_iso = datetime.now(timezone.utc).isoformat()
|
| 928 |
update = {
|
|
|
|
| 990 |
"ok": True,
|
| 991 |
"enabled": bool(config.URL_INGEST_ADMIN_ENABLED),
|
| 992 |
"allowed_hosts": list(config.URL_INGEST_ALLOWED_HOSTS),
|
| 993 |
+
"extractor": config.URL_INGEST_EXTRACTOR,
|
| 994 |
+
"default_mode": config.URL_INGEST_DEFAULT_MODE,
|
| 995 |
+
"max_depth": config.URL_INGEST_MAX_DEPTH,
|
| 996 |
+
"max_pages": config.URL_INGEST_MAX_PAGES,
|
| 997 |
+
"crawl4ai_enabled": bool(config.URL_INGEST_CRAWL4AI_ENABLED),
|
| 998 |
"timeout_s": config.URL_INGEST_TIMEOUT_S,
|
| 999 |
"max_bytes": config.URL_INGEST_MAX_BYTES,
|
| 1000 |
"user_id": user_id,
|
|
|
|
| 1012 |
_check_admin(x_admin_key)
|
| 1013 |
_ensure_url_ingest_enabled()
|
| 1014 |
try:
|
| 1015 |
+
fetched = fetch_url_to_tempfile(
|
| 1016 |
+
payload.url,
|
| 1017 |
+
label=payload.label,
|
| 1018 |
+
mode=payload.mode,
|
| 1019 |
+
max_depth=payload.max_depth,
|
| 1020 |
+
max_pages=payload.max_pages,
|
| 1021 |
+
)
|
| 1022 |
except UrlIngestionError as exc:
|
| 1023 |
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
| 1024 |
except Exception as exc:
|
|
|
|
| 1050 |
"content_type": fetched.content_type,
|
| 1051 |
"content_bytes": fetched.content_bytes,
|
| 1052 |
"title": fetched.title,
|
| 1053 |
+
"pages": fetched.pages or [],
|
| 1054 |
}
|
backend/core/config.py
CHANGED
|
@@ -51,6 +51,24 @@ ADMIN_REVIEW_MODEL = os.getenv(
|
|
| 51 |
"ADMIN_REVIEW_MODEL",
|
| 52 |
os.getenv("ADMIN_REVIEW_OLLAMA_MODEL", "gemma4:latest"),
|
| 53 |
).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
ADMIN_REVIEW_OLLAMA_MODEL = ADMIN_REVIEW_MODEL
|
| 55 |
ADMIN_REVIEW_OLLAMA_TEMPERATURE = float(
|
| 56 |
os.getenv("ADMIN_REVIEW_OLLAMA_TEMPERATURE", "0.1")
|
|
@@ -302,6 +320,17 @@ URL_INGEST_ALLOWED_HOSTS = [
|
|
| 302 |
for segment in os.getenv("URL_INGEST_ALLOWED_HOSTS", "").split(",")
|
| 303 |
if segment.strip()
|
| 304 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
URL_INGEST_ALLOWED_CONTENT_TYPES = {
|
| 306 |
"text/html",
|
| 307 |
"application/xhtml+xml",
|
|
|
|
| 51 |
"ADMIN_REVIEW_MODEL",
|
| 52 |
os.getenv("ADMIN_REVIEW_OLLAMA_MODEL", "gemma4:latest"),
|
| 53 |
).strip()
|
| 54 |
+
ADMIN_REVIEW_PRIMARY_PROVIDER = os.getenv(
|
| 55 |
+
"ADMIN_REVIEW_PRIMARY_PROVIDER", "nvidia"
|
| 56 |
+
).strip().lower()
|
| 57 |
+
ADMIN_REVIEW_PRIMARY_MODEL = os.getenv(
|
| 58 |
+
"ADMIN_REVIEW_PRIMARY_MODEL", "google/gemma-4-31b-it"
|
| 59 |
+
).strip()
|
| 60 |
+
ADMIN_REVIEW_SECONDARY_PROVIDER = os.getenv(
|
| 61 |
+
"ADMIN_REVIEW_SECONDARY_PROVIDER", "ollama"
|
| 62 |
+
).strip().lower()
|
| 63 |
+
ADMIN_REVIEW_SECONDARY_MODEL = os.getenv(
|
| 64 |
+
"ADMIN_REVIEW_SECONDARY_MODEL", "gemma4:26b"
|
| 65 |
+
).strip()
|
| 66 |
+
ADMIN_REVIEW_CONSENSUS_ENABLED = os.getenv(
|
| 67 |
+
"ADMIN_REVIEW_CONSENSUS_ENABLED", "true"
|
| 68 |
+
).lower() in {"1", "true", "yes"}
|
| 69 |
+
ADMIN_REVIEW_CONSENSUS_MIN_CONFIDENCE = float(
|
| 70 |
+
os.getenv("ADMIN_REVIEW_CONSENSUS_MIN_CONFIDENCE", "0.75")
|
| 71 |
+
)
|
| 72 |
ADMIN_REVIEW_OLLAMA_MODEL = ADMIN_REVIEW_MODEL
|
| 73 |
ADMIN_REVIEW_OLLAMA_TEMPERATURE = float(
|
| 74 |
os.getenv("ADMIN_REVIEW_OLLAMA_TEMPERATURE", "0.1")
|
|
|
|
| 320 |
for segment in os.getenv("URL_INGEST_ALLOWED_HOSTS", "").split(",")
|
| 321 |
if segment.strip()
|
| 322 |
]
|
| 323 |
+
URL_INGEST_EXTRACTOR = os.getenv("URL_INGEST_EXTRACTOR", "basic").strip().lower()
|
| 324 |
+
URL_INGEST_CRAWL4AI_ENABLED = os.getenv(
|
| 325 |
+
"URL_INGEST_CRAWL4AI_ENABLED", "false"
|
| 326 |
+
).lower() in {"1", "true", "yes"}
|
| 327 |
+
URL_INGEST_CRAWL4AI_FALLBACK = os.getenv(
|
| 328 |
+
"URL_INGEST_CRAWL4AI_FALLBACK", "false"
|
| 329 |
+
).lower() in {"1", "true", "yes"}
|
| 330 |
+
URL_INGEST_DEFAULT_MODE = os.getenv("URL_INGEST_DEFAULT_MODE", "single_page").strip().lower()
|
| 331 |
+
URL_INGEST_MAX_DEPTH = int(os.getenv("URL_INGEST_MAX_DEPTH", "1"))
|
| 332 |
+
URL_INGEST_MAX_PAGES = int(os.getenv("URL_INGEST_MAX_PAGES", "5"))
|
| 333 |
+
URL_INGEST_CRAWL4AI_TIMEOUT_S = float(os.getenv("URL_INGEST_CRAWL4AI_TIMEOUT_S", "20"))
|
| 334 |
URL_INGEST_ALLOWED_CONTENT_TYPES = {
|
| 335 |
"text/html",
|
| 336 |
"application/xhtml+xml",
|
backend/core/url_ingestion.py
CHANGED
|
@@ -4,13 +4,14 @@ from __future__ import annotations
|
|
| 4 |
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from html.parser import HTMLParser
|
|
|
|
| 7 |
import ipaddress
|
| 8 |
import os
|
| 9 |
from pathlib import Path
|
| 10 |
import socket
|
| 11 |
import tempfile
|
| 12 |
from typing import Iterable
|
| 13 |
-
from urllib.parse import urlparse
|
| 14 |
|
| 15 |
import requests
|
| 16 |
|
|
@@ -30,6 +31,18 @@ class FetchedUrlDocument:
|
|
| 30 |
content_type: str
|
| 31 |
content_bytes: int
|
| 32 |
title: str | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
class _VisibleTextParser(HTMLParser):
|
|
@@ -164,6 +177,23 @@ def _render_markdown_from_remote(source_url: str, title: str | None, body: str)
|
|
| 164 |
return "\n".join(parts).strip() + "\n"
|
| 165 |
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
def _extract_text_payload(content_type: str, raw_bytes: bytes) -> tuple[str | None, str]:
|
| 168 |
lowered = content_type.lower()
|
| 169 |
decoded = raw_bytes.decode("utf-8", errors="replace")
|
|
@@ -180,31 +210,30 @@ def _extract_text_payload(content_type: str, raw_bytes: bytes) -> tuple[str | No
|
|
| 180 |
return None, text
|
| 181 |
|
| 182 |
|
| 183 |
-
def
|
| 184 |
-
|
| 185 |
-
|
|
|
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
timeout=config.URL_INGEST_TIMEOUT_S,
|
| 192 |
-
stream=True,
|
| 193 |
-
headers={"User-Agent": config.URL_INGEST_USER_AGENT},
|
| 194 |
-
)
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
|
|
|
| 203 |
|
| 204 |
-
content_type = (response.headers.get("content-type") or "").split(";", 1)[0].strip().lower()
|
| 205 |
-
if content_type not in config.URL_INGEST_ALLOWED_CONTENT_TYPES:
|
| 206 |
-
raise UrlIngestionError(f"Unsupported URL content type: {content_type or 'unknown'}.")
|
| 207 |
|
|
|
|
| 208 |
declared_length = response.headers.get("content-length")
|
| 209 |
if declared_length:
|
| 210 |
try:
|
|
@@ -220,20 +249,209 @@ def fetch_url_to_tempfile(url: str, *, label: str | None = None) -> FetchedUrlDo
|
|
| 220 |
body.extend(chunk)
|
| 221 |
if len(body) > config.URL_INGEST_MAX_BYTES:
|
| 222 |
raise UrlIngestionError("Remote URL response exceeds the configured size limit.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
rendered = _render_markdown_from_remote(final_url, label or title, text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".md", prefix="morpheus_url_")
|
| 227 |
os.close(tmp_fd)
|
| 228 |
with open(tmp_path, "w", encoding="utf-8") as handle:
|
| 229 |
handle.write(rendered)
|
| 230 |
|
| 231 |
return FetchedUrlDocument(
|
| 232 |
-
source_url=
|
| 233 |
final_url=final_url,
|
| 234 |
-
filename=_sanitize_filename(final_url, label or title),
|
| 235 |
temp_path=tmp_path,
|
| 236 |
-
content_type=content_type,
|
| 237 |
-
content_bytes=
|
| 238 |
-
title=label or title,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
)
|
|
|
|
| 4 |
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from html.parser import HTMLParser
|
| 7 |
+
import asyncio
|
| 8 |
import ipaddress
|
| 9 |
import os
|
| 10 |
from pathlib import Path
|
| 11 |
import socket
|
| 12 |
import tempfile
|
| 13 |
from typing import Iterable
|
| 14 |
+
from urllib.parse import urljoin, urlparse
|
| 15 |
|
| 16 |
import requests
|
| 17 |
|
|
|
|
| 31 |
content_type: str
|
| 32 |
content_bytes: int
|
| 33 |
title: str | None = None
|
| 34 |
+
pages: list[dict] | None = None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass
|
| 38 |
+
class UrlPage:
|
| 39 |
+
source_url: str
|
| 40 |
+
final_url: str
|
| 41 |
+
title: str | None
|
| 42 |
+
markdown: str
|
| 43 |
+
content_type: str
|
| 44 |
+
content_bytes: int
|
| 45 |
+
links: list[str]
|
| 46 |
|
| 47 |
|
| 48 |
class _VisibleTextParser(HTMLParser):
|
|
|
|
| 177 |
return "\n".join(parts).strip() + "\n"
|
| 178 |
|
| 179 |
|
| 180 |
+
def _render_combined_markdown(label: str | None, pages: list[UrlPage]) -> str:
|
| 181 |
+
title = label or (pages[0].title if pages else None) or "URL Import"
|
| 182 |
+
parts = [f"# {title}", ""]
|
| 183 |
+
for index, page in enumerate(pages, start=1):
|
| 184 |
+
page_title = page.title or page.final_url
|
| 185 |
+
parts.extend(
|
| 186 |
+
[
|
| 187 |
+
f"## Page {index}: {page_title}",
|
| 188 |
+
f"Source URL: {page.final_url}",
|
| 189 |
+
"",
|
| 190 |
+
page.markdown.strip(),
|
| 191 |
+
"",
|
| 192 |
+
]
|
| 193 |
+
)
|
| 194 |
+
return "\n".join(parts).strip() + "\n"
|
| 195 |
+
|
| 196 |
+
|
| 197 |
def _extract_text_payload(content_type: str, raw_bytes: bytes) -> tuple[str | None, str]:
|
| 198 |
lowered = content_type.lower()
|
| 199 |
decoded = raw_bytes.decode("utf-8", errors="replace")
|
|
|
|
| 210 |
return None, text
|
| 211 |
|
| 212 |
|
| 213 |
+
def _extract_links_from_html(base_url: str, raw_bytes: bytes, content_type: str) -> list[str]:
|
| 214 |
+
if not content_type.lower().startswith("text/html"):
|
| 215 |
+
return []
|
| 216 |
+
html = raw_bytes.decode("utf-8", errors="replace")
|
| 217 |
|
| 218 |
+
class _LinkParser(HTMLParser):
|
| 219 |
+
def __init__(self) -> None:
|
| 220 |
+
super().__init__()
|
| 221 |
+
self.links: list[str] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
+
def handle_starttag(self, tag: str, attrs) -> None: # noqa: ANN001
|
| 224 |
+
if tag != "a":
|
| 225 |
+
return
|
| 226 |
+
attrs_dict = dict(attrs)
|
| 227 |
+
href = (attrs_dict.get("href") or "").strip()
|
| 228 |
+
if href:
|
| 229 |
+
self.links.append(urljoin(base_url, href))
|
| 230 |
|
| 231 |
+
parser = _LinkParser()
|
| 232 |
+
parser.feed(html)
|
| 233 |
+
return parser.links
|
| 234 |
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
+
def _read_limited_response(response) -> bytes: # noqa: ANN001
|
| 237 |
declared_length = response.headers.get("content-length")
|
| 238 |
if declared_length:
|
| 239 |
try:
|
|
|
|
| 249 |
body.extend(chunk)
|
| 250 |
if len(body) > config.URL_INGEST_MAX_BYTES:
|
| 251 |
raise UrlIngestionError("Remote URL response exceeds the configured size limit.")
|
| 252 |
+
return bytes(body)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def _fetch_basic_page(url: str, *, label: str | None = None) -> UrlPage:
|
| 256 |
+
parsed = urlparse((url or "").strip())
|
| 257 |
+
_ensure_safe_host(parsed)
|
| 258 |
|
| 259 |
+
with requests.Session() as session:
|
| 260 |
+
response = session.get(
|
| 261 |
+
parsed.geturl(),
|
| 262 |
+
allow_redirects=True,
|
| 263 |
+
timeout=config.URL_INGEST_TIMEOUT_S,
|
| 264 |
+
stream=True,
|
| 265 |
+
headers={"User-Agent": config.URL_INGEST_USER_AGENT},
|
| 266 |
+
)
|
| 267 |
+
try:
|
| 268 |
+
response.raise_for_status()
|
| 269 |
+
except requests.HTTPError as exc:
|
| 270 |
+
raise UrlIngestionError(f"URL fetch failed with status {response.status_code}.") from exc
|
| 271 |
+
|
| 272 |
+
final_url = response.url or parsed.geturl()
|
| 273 |
+
_ensure_safe_host(urlparse(final_url))
|
| 274 |
+
content_type = (response.headers.get("content-type") or "").split(";", 1)[0].strip().lower()
|
| 275 |
+
if content_type not in config.URL_INGEST_ALLOWED_CONTENT_TYPES:
|
| 276 |
+
raise UrlIngestionError(f"Unsupported URL content type: {content_type or 'unknown'}.")
|
| 277 |
+
body = _read_limited_response(response)
|
| 278 |
+
|
| 279 |
+
title, text = _extract_text_payload(content_type, body)
|
| 280 |
rendered = _render_markdown_from_remote(final_url, label or title, text)
|
| 281 |
+
return UrlPage(
|
| 282 |
+
source_url=parsed.geturl(),
|
| 283 |
+
final_url=final_url,
|
| 284 |
+
title=label or title,
|
| 285 |
+
markdown=rendered,
|
| 286 |
+
content_type=content_type,
|
| 287 |
+
content_bytes=len(body),
|
| 288 |
+
links=_extract_links_from_html(final_url, body, content_type),
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _run_crawl4ai_single_page(url: str, *, label: str | None = None) -> UrlPage:
|
| 293 |
+
if not config.URL_INGEST_CRAWL4AI_ENABLED:
|
| 294 |
+
raise UrlIngestionError("Crawl4AI URL extraction is disabled.")
|
| 295 |
+
parsed = urlparse((url or "").strip())
|
| 296 |
+
_ensure_safe_host(parsed)
|
| 297 |
+
try:
|
| 298 |
+
from crawl4ai import AsyncWebCrawler
|
| 299 |
+
except Exception as exc:
|
| 300 |
+
if config.URL_INGEST_CRAWL4AI_FALLBACK:
|
| 301 |
+
return _fetch_basic_page(url, label=label)
|
| 302 |
+
raise UrlIngestionError("Crawl4AI is not installed or could not be imported.") from exc
|
| 303 |
+
|
| 304 |
+
async def _crawl():
|
| 305 |
+
async with AsyncWebCrawler() as crawler:
|
| 306 |
+
return await asyncio.wait_for(
|
| 307 |
+
crawler.arun(url=parsed.geturl()),
|
| 308 |
+
timeout=config.URL_INGEST_CRAWL4AI_TIMEOUT_S,
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
try:
|
| 312 |
+
result = asyncio.run(_crawl())
|
| 313 |
+
except Exception as exc:
|
| 314 |
+
if config.URL_INGEST_CRAWL4AI_FALLBACK:
|
| 315 |
+
return _fetch_basic_page(url, label=label)
|
| 316 |
+
raise UrlIngestionError(f"Crawl4AI extraction failed: {exc}") from exc
|
| 317 |
+
|
| 318 |
+
final_url = str(getattr(result, "url", None) or parsed.geturl())
|
| 319 |
+
_ensure_safe_host(urlparse(final_url))
|
| 320 |
+
markdown = str(getattr(result, "markdown", "") or "").strip()
|
| 321 |
+
if not markdown:
|
| 322 |
+
raise UrlIngestionError("Crawl4AI returned empty markdown.")
|
| 323 |
+
if len(markdown.encode("utf-8")) > config.URL_INGEST_MAX_BYTES:
|
| 324 |
+
raise UrlIngestionError("Crawl4AI markdown exceeds the configured size limit.")
|
| 325 |
+
metadata = getattr(result, "metadata", None) or {}
|
| 326 |
+
title = label or metadata.get("title") or None
|
| 327 |
+
links: list[str] = []
|
| 328 |
+
raw_links = getattr(result, "links", None)
|
| 329 |
+
if isinstance(raw_links, dict):
|
| 330 |
+
for values in raw_links.values():
|
| 331 |
+
if isinstance(values, list):
|
| 332 |
+
for item in values:
|
| 333 |
+
if isinstance(item, dict) and item.get("href"):
|
| 334 |
+
links.append(urljoin(final_url, str(item["href"])))
|
| 335 |
+
elif isinstance(item, str):
|
| 336 |
+
links.append(urljoin(final_url, item))
|
| 337 |
+
return UrlPage(
|
| 338 |
+
source_url=parsed.geturl(),
|
| 339 |
+
final_url=final_url,
|
| 340 |
+
title=title,
|
| 341 |
+
markdown=_render_markdown_from_remote(final_url, title, markdown),
|
| 342 |
+
content_type="text/markdown",
|
| 343 |
+
content_bytes=len(markdown.encode("utf-8")),
|
| 344 |
+
links=links,
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
def _fetch_url_page(url: str, *, label: str | None = None) -> UrlPage:
|
| 349 |
+
if config.URL_INGEST_EXTRACTOR == "crawl4ai":
|
| 350 |
+
return _run_crawl4ai_single_page(url, label=label)
|
| 351 |
+
if config.URL_INGEST_EXTRACTOR != "basic":
|
| 352 |
+
raise UrlIngestionError("URL_INGEST_EXTRACTOR must be basic or crawl4ai.")
|
| 353 |
+
return _fetch_basic_page(url, label=label)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def _same_allowed_host(candidate_url: str, root_hostname: str) -> bool:
|
| 357 |
+
parsed = urlparse(candidate_url)
|
| 358 |
+
try:
|
| 359 |
+
hostname = _normalize_host(parsed.hostname)
|
| 360 |
+
except UrlIngestionError:
|
| 361 |
+
return False
|
| 362 |
+
return hostname == root_hostname
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def _crawl_bounded_pages(
|
| 366 |
+
url: str,
|
| 367 |
+
*,
|
| 368 |
+
label: str | None,
|
| 369 |
+
max_depth: int,
|
| 370 |
+
max_pages: int,
|
| 371 |
+
) -> list[UrlPage]:
|
| 372 |
+
if not config.URL_INGEST_ALLOWED_HOSTS:
|
| 373 |
+
raise UrlIngestionError("Bounded URL crawl requires URL_INGEST_ALLOWED_HOSTS.")
|
| 374 |
+
root = urlparse((url or "").strip())
|
| 375 |
+
root_host = _normalize_host(root.hostname)
|
| 376 |
+
_ensure_safe_host(root)
|
| 377 |
+
|
| 378 |
+
seen: set[str] = set()
|
| 379 |
+
queue: list[tuple[str, int]] = [(root.geturl(), 0)]
|
| 380 |
+
pages: list[UrlPage] = []
|
| 381 |
+
while queue and len(pages) < max_pages:
|
| 382 |
+
current_url, depth = queue.pop(0)
|
| 383 |
+
normalized = current_url.split("#", 1)[0]
|
| 384 |
+
if normalized in seen:
|
| 385 |
+
continue
|
| 386 |
+
seen.add(normalized)
|
| 387 |
+
page = _fetch_url_page(normalized, label=label if not pages else None)
|
| 388 |
+
pages.append(page)
|
| 389 |
+
if depth >= max_depth:
|
| 390 |
+
continue
|
| 391 |
+
for link in page.links:
|
| 392 |
+
link_without_fragment = link.split("#", 1)[0]
|
| 393 |
+
if link_without_fragment in seen:
|
| 394 |
+
continue
|
| 395 |
+
if not _same_allowed_host(link_without_fragment, root_host):
|
| 396 |
+
continue
|
| 397 |
+
try:
|
| 398 |
+
_ensure_safe_host(urlparse(link_without_fragment))
|
| 399 |
+
except UrlIngestionError:
|
| 400 |
+
continue
|
| 401 |
+
queue.append((link_without_fragment, depth + 1))
|
| 402 |
+
if len(queue) + len(pages) >= max_pages:
|
| 403 |
+
break
|
| 404 |
+
if not pages:
|
| 405 |
+
raise UrlIngestionError("No crawlable pages were fetched.")
|
| 406 |
+
return pages
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def fetch_url_to_tempfile(
|
| 410 |
+
url: str,
|
| 411 |
+
*,
|
| 412 |
+
label: str | None = None,
|
| 413 |
+
mode: str | None = None,
|
| 414 |
+
max_depth: int | None = None,
|
| 415 |
+
max_pages: int | None = None,
|
| 416 |
+
) -> FetchedUrlDocument:
|
| 417 |
+
selected_mode = (mode or config.URL_INGEST_DEFAULT_MODE or "single_page").strip().lower()
|
| 418 |
+
page_limit = max(1, min(max_pages or config.URL_INGEST_MAX_PAGES, config.URL_INGEST_MAX_PAGES))
|
| 419 |
+
depth_limit = max(0, min(max_depth if max_depth is not None else config.URL_INGEST_MAX_DEPTH, config.URL_INGEST_MAX_DEPTH))
|
| 420 |
+
if selected_mode == "bounded_crawl":
|
| 421 |
+
pages = _crawl_bounded_pages(
|
| 422 |
+
url,
|
| 423 |
+
label=label,
|
| 424 |
+
max_depth=depth_limit,
|
| 425 |
+
max_pages=page_limit,
|
| 426 |
+
)
|
| 427 |
+
elif selected_mode == "single_page":
|
| 428 |
+
pages = [_fetch_url_page(url, label=label)]
|
| 429 |
+
else:
|
| 430 |
+
raise UrlIngestionError("URL ingest mode must be single_page or bounded_crawl.")
|
| 431 |
+
|
| 432 |
+
rendered = _render_combined_markdown(label, pages)
|
| 433 |
+
final_url = pages[0].final_url
|
| 434 |
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".md", prefix="morpheus_url_")
|
| 435 |
os.close(tmp_fd)
|
| 436 |
with open(tmp_path, "w", encoding="utf-8") as handle:
|
| 437 |
handle.write(rendered)
|
| 438 |
|
| 439 |
return FetchedUrlDocument(
|
| 440 |
+
source_url=urlparse((url or "").strip()).geturl(),
|
| 441 |
final_url=final_url,
|
| 442 |
+
filename=_sanitize_filename(final_url, label or pages[0].title),
|
| 443 |
temp_path=tmp_path,
|
| 444 |
+
content_type=pages[0].content_type,
|
| 445 |
+
content_bytes=sum(page.content_bytes for page in pages),
|
| 446 |
+
title=label or pages[0].title,
|
| 447 |
+
pages=[
|
| 448 |
+
{
|
| 449 |
+
"source_url": page.source_url,
|
| 450 |
+
"final_url": page.final_url,
|
| 451 |
+
"title": page.title,
|
| 452 |
+
"content_type": page.content_type,
|
| 453 |
+
"content_bytes": page.content_bytes,
|
| 454 |
+
}
|
| 455 |
+
for page in pages
|
| 456 |
+
],
|
| 457 |
)
|
backend/eval/draft_reviewer.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import json
|
|
|
|
| 2 |
from typing import Any, Optional
|
| 3 |
|
| 4 |
from backend.core import config
|
|
@@ -172,8 +173,8 @@ def _build_prompt(dataset_row: dict[str, Any], trace_row: Optional[dict[str, Any
|
|
| 172 |
)
|
| 173 |
|
| 174 |
|
| 175 |
-
def _resolve_admin_review_provider(model: str) -> str:
|
| 176 |
-
configured = (config.ADMIN_REVIEW_PROVIDER or "auto").strip().lower()
|
| 177 |
if configured in {"ollama", "nvidia"}:
|
| 178 |
return configured
|
| 179 |
if config.NVIDIA_API_KEY and "/" in (model or ""):
|
|
@@ -185,9 +186,10 @@ def _generate_review_text(
|
|
| 185 |
*,
|
| 186 |
prompt: str,
|
| 187 |
model: str,
|
|
|
|
| 188 |
) -> str:
|
| 189 |
-
|
| 190 |
-
if
|
| 191 |
return nvidia_generate(
|
| 192 |
base_url=config.NVIDIA_API_BASE_URL,
|
| 193 |
api_key=config.NVIDIA_API_KEY or "",
|
|
@@ -214,11 +216,13 @@ def generate_eval_dataset_draft_review(
|
|
| 214 |
trace_row: Optional[dict[str, Any]] = None,
|
| 215 |
feedback_rows: Optional[list[dict[str, Any]]] = None,
|
| 216 |
model: Optional[str] = None,
|
|
|
|
| 217 |
) -> dict[str, Any]:
|
| 218 |
prompt = _build_prompt(dataset_row, trace_row, feedback_rows or [])
|
| 219 |
selected_model = model or config.ADMIN_REVIEW_MODEL
|
| 220 |
-
raw = _generate_review_text(prompt=prompt, model=selected_model)
|
| 221 |
data = _extract_json_object(raw)
|
|
|
|
| 222 |
return {
|
| 223 |
"manual_verdict": _normalize_verdict(data.get("manual_verdict")),
|
| 224 |
"gold_answer_text": _clean_text(data.get("gold_answer_text"))
|
|
@@ -233,5 +237,133 @@ def generate_eval_dataset_draft_review(
|
|
| 233 |
"confidence": _normalize_confidence(data.get("confidence")),
|
| 234 |
"rationale": _clean_text(data.get("rationale")) or "Draft generated from trace, feedback, and existing evidence.",
|
| 235 |
"model": selected_model,
|
| 236 |
-
"provider":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
}
|
|
|
|
| 1 |
import json
|
| 2 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 3 |
from typing import Any, Optional
|
| 4 |
|
| 5 |
from backend.core import config
|
|
|
|
| 173 |
)
|
| 174 |
|
| 175 |
|
| 176 |
+
def _resolve_admin_review_provider(model: str, provider: Optional[str] = None) -> str:
|
| 177 |
+
configured = (provider or config.ADMIN_REVIEW_PROVIDER or "auto").strip().lower()
|
| 178 |
if configured in {"ollama", "nvidia"}:
|
| 179 |
return configured
|
| 180 |
if config.NVIDIA_API_KEY and "/" in (model or ""):
|
|
|
|
| 186 |
*,
|
| 187 |
prompt: str,
|
| 188 |
model: str,
|
| 189 |
+
provider: Optional[str] = None,
|
| 190 |
) -> str:
|
| 191 |
+
resolved_provider = _resolve_admin_review_provider(model, provider)
|
| 192 |
+
if resolved_provider == "nvidia":
|
| 193 |
return nvidia_generate(
|
| 194 |
base_url=config.NVIDIA_API_BASE_URL,
|
| 195 |
api_key=config.NVIDIA_API_KEY or "",
|
|
|
|
| 216 |
trace_row: Optional[dict[str, Any]] = None,
|
| 217 |
feedback_rows: Optional[list[dict[str, Any]]] = None,
|
| 218 |
model: Optional[str] = None,
|
| 219 |
+
provider: Optional[str] = None,
|
| 220 |
) -> dict[str, Any]:
|
| 221 |
prompt = _build_prompt(dataset_row, trace_row, feedback_rows or [])
|
| 222 |
selected_model = model or config.ADMIN_REVIEW_MODEL
|
| 223 |
+
raw = _generate_review_text(prompt=prompt, model=selected_model, provider=provider)
|
| 224 |
data = _extract_json_object(raw)
|
| 225 |
+
resolved_provider = _resolve_admin_review_provider(selected_model, provider)
|
| 226 |
return {
|
| 227 |
"manual_verdict": _normalize_verdict(data.get("manual_verdict")),
|
| 228 |
"gold_answer_text": _clean_text(data.get("gold_answer_text"))
|
|
|
|
| 237 |
"confidence": _normalize_confidence(data.get("confidence")),
|
| 238 |
"rationale": _clean_text(data.get("rationale")) or "Draft generated from trace, feedback, and existing evidence.",
|
| 239 |
"model": selected_model,
|
| 240 |
+
"provider": resolved_provider,
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _token_set(text: str) -> set[str]:
|
| 245 |
+
return {
|
| 246 |
+
token
|
| 247 |
+
for token in "".join(ch.lower() if ch.isalnum() else " " for ch in text or "").split()
|
| 248 |
+
if len(token) > 2
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _text_compatible(left: str, right: str) -> bool:
|
| 253 |
+
left_tokens = _token_set(left)
|
| 254 |
+
right_tokens = _token_set(right)
|
| 255 |
+
if not left_tokens or not right_tokens:
|
| 256 |
+
return False
|
| 257 |
+
overlap = len(left_tokens & right_tokens) / max(1, min(len(left_tokens), len(right_tokens)))
|
| 258 |
+
return overlap >= 0.25
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def _error_draft(*, model: str, provider: str, exc: Exception) -> dict[str, Any]:
|
| 262 |
+
return {
|
| 263 |
+
"manual_verdict": "partial",
|
| 264 |
+
"gold_answer_text": "",
|
| 265 |
+
"gold_evidence_text": "",
|
| 266 |
+
"failure_mode": "other",
|
| 267 |
+
"confidence": 0.0,
|
| 268 |
+
"rationale": f"Reviewer failed: {exc}",
|
| 269 |
+
"model": model,
|
| 270 |
+
"provider": provider,
|
| 271 |
+
"error": str(exc),
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def _build_consensus(primary: dict[str, Any], secondary: Optional[dict[str, Any]]) -> dict[str, Any]:
|
| 276 |
+
if primary.get("error") and secondary and not secondary.get("error"):
|
| 277 |
+
return {
|
| 278 |
+
**secondary,
|
| 279 |
+
"agreement_status": "secondary_only",
|
| 280 |
+
"apply_ready": True,
|
| 281 |
+
"confidence": min(float(secondary.get("confidence") or 0.0), 0.74),
|
| 282 |
+
"rationale": "Only the secondary reviewer produced a usable draft.",
|
| 283 |
+
}
|
| 284 |
+
if not secondary or secondary.get("error"):
|
| 285 |
+
return {
|
| 286 |
+
**primary,
|
| 287 |
+
"agreement_status": "primary_only",
|
| 288 |
+
"apply_ready": True,
|
| 289 |
+
"confidence": min(float(primary.get("confidence") or 0.0), 0.74),
|
| 290 |
+
"rationale": "Only the primary reviewer produced a usable draft.",
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
verdict_match = primary.get("manual_verdict") == secondary.get("manual_verdict")
|
| 294 |
+
answer_match = _text_compatible(
|
| 295 |
+
primary.get("gold_answer_text") or "",
|
| 296 |
+
secondary.get("gold_answer_text") or "",
|
| 297 |
+
)
|
| 298 |
+
evidence_match = _text_compatible(
|
| 299 |
+
primary.get("gold_evidence_text") or "",
|
| 300 |
+
secondary.get("gold_evidence_text") or "",
|
| 301 |
+
)
|
| 302 |
+
min_confidence = min(
|
| 303 |
+
float(primary.get("confidence") or 0.0),
|
| 304 |
+
float(secondary.get("confidence") or 0.0),
|
| 305 |
+
)
|
| 306 |
+
chosen = primary if float(primary.get("confidence") or 0.0) >= float(secondary.get("confidence") or 0.0) else secondary
|
| 307 |
+
compatible = verdict_match and (answer_match or evidence_match)
|
| 308 |
+
if compatible and min_confidence >= config.ADMIN_REVIEW_CONSENSUS_MIN_CONFIDENCE:
|
| 309 |
+
status = "agreement"
|
| 310 |
+
elif compatible:
|
| 311 |
+
status = "weak_agreement"
|
| 312 |
+
else:
|
| 313 |
+
status = "disagreement"
|
| 314 |
+
return {
|
| 315 |
+
"manual_verdict": chosen.get("manual_verdict") or "partial",
|
| 316 |
+
"gold_answer_text": chosen.get("gold_answer_text") or "",
|
| 317 |
+
"gold_evidence_text": chosen.get("gold_evidence_text") or "",
|
| 318 |
+
"failure_mode": chosen.get("failure_mode"),
|
| 319 |
+
"confidence": min_confidence,
|
| 320 |
+
"rationale": (
|
| 321 |
+
"Both reviewers agree enough for draft application."
|
| 322 |
+
if status in {"agreement", "weak_agreement"}
|
| 323 |
+
else "Reviewers disagreed; human edit is required before applying."
|
| 324 |
+
),
|
| 325 |
+
"agreement_status": status,
|
| 326 |
+
"apply_ready": status in {"agreement", "weak_agreement"},
|
| 327 |
+
"primary_model": primary.get("model"),
|
| 328 |
+
"secondary_model": secondary.get("model"),
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
def generate_eval_dataset_dual_draft_review(
|
| 333 |
+
dataset_row: dict[str, Any],
|
| 334 |
+
*,
|
| 335 |
+
trace_row: Optional[dict[str, Any]] = None,
|
| 336 |
+
feedback_rows: Optional[list[dict[str, Any]]] = None,
|
| 337 |
+
primary_model: Optional[str] = None,
|
| 338 |
+
secondary_model: Optional[str] = None,
|
| 339 |
+
primary_provider: Optional[str] = None,
|
| 340 |
+
secondary_provider: Optional[str] = None,
|
| 341 |
+
) -> dict[str, Any]:
|
| 342 |
+
selected_primary_model = primary_model or config.ADMIN_REVIEW_PRIMARY_MODEL
|
| 343 |
+
selected_secondary_model = secondary_model or config.ADMIN_REVIEW_SECONDARY_MODEL
|
| 344 |
+
selected_primary_provider = primary_provider or config.ADMIN_REVIEW_PRIMARY_PROVIDER
|
| 345 |
+
selected_secondary_provider = secondary_provider or config.ADMIN_REVIEW_SECONDARY_PROVIDER
|
| 346 |
+
|
| 347 |
+
def _run(model: str, provider: str) -> dict[str, Any]:
|
| 348 |
+
try:
|
| 349 |
+
return generate_eval_dataset_draft_review(
|
| 350 |
+
dataset_row,
|
| 351 |
+
trace_row=trace_row,
|
| 352 |
+
feedback_rows=feedback_rows or [],
|
| 353 |
+
model=model,
|
| 354 |
+
provider=provider,
|
| 355 |
+
)
|
| 356 |
+
except Exception as exc:
|
| 357 |
+
return _error_draft(model=model, provider=provider, exc=exc)
|
| 358 |
+
|
| 359 |
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
| 360 |
+
primary_future = executor.submit(_run, selected_primary_model, selected_primary_provider)
|
| 361 |
+
secondary_future = executor.submit(_run, selected_secondary_model, selected_secondary_provider)
|
| 362 |
+
primary = primary_future.result()
|
| 363 |
+
secondary = secondary_future.result()
|
| 364 |
+
|
| 365 |
+
return {
|
| 366 |
+
"primary": primary,
|
| 367 |
+
"secondary": secondary,
|
| 368 |
+
"consensus": _build_consensus(primary, secondary),
|
| 369 |
}
|
docs/operations_playbook.md
CHANGED
|
@@ -5,6 +5,7 @@
|
|
| 5 |
- Promote high-signal traces into `evaluation_datasets`
|
| 6 |
- Review Gemma drafts, then manually activate trusted rows
|
| 7 |
- If using hosted Gemma review, ensure `NVIDIA_API_KEY` is set and valid
|
|
|
|
| 8 |
|
| 9 |
## Monthly
|
| 10 |
- Run retrieval-profile sweeps against the active reviewed baseline
|
|
@@ -22,10 +23,12 @@
|
|
| 22 |
- Use Graph view for summary, search, path, and export
|
| 23 |
- Code indexing is operator-only and restricted to allowed roots
|
| 24 |
- URL ingestion is operator-only and restricted by host allowlist plus public-IP checks
|
|
|
|
| 25 |
- Keep PDF answer-first flow unchanged while graph features expand
|
| 26 |
|
| 27 |
## Admin Review Backends
|
| 28 |
- `ADMIN_REVIEW_PROVIDER=auto` prefers NVIDIA hosted models when `NVIDIA_API_KEY` is present and the model looks hosted
|
| 29 |
- `ADMIN_REVIEW_PROVIDER=ollama` forces local review generation
|
| 30 |
- `ADMIN_REVIEW_PROVIDER=nvidia` forces NVIDIA hosted review generation
|
|
|
|
| 31 |
- Rotate and replace any leaked provider keys before putting them into runtime envs
|
|
|
|
| 5 |
- Promote high-signal traces into `evaluation_datasets`
|
| 6 |
- Review Gemma drafts, then manually activate trusted rows
|
| 7 |
- If using hosted Gemma review, ensure `NVIDIA_API_KEY` is set and valid
|
| 8 |
+
- If using dual review, compare consensus disagreements before applying drafts
|
| 9 |
|
| 10 |
## Monthly
|
| 11 |
- Run retrieval-profile sweeps against the active reviewed baseline
|
|
|
|
| 23 |
- Use Graph view for summary, search, path, and export
|
| 24 |
- Code indexing is operator-only and restricted to allowed roots
|
| 25 |
- URL ingestion is operator-only and restricted by host allowlist plus public-IP checks
|
| 26 |
+
- Bounded URL crawl must stay within configured depth and page budgets
|
| 27 |
- Keep PDF answer-first flow unchanged while graph features expand
|
| 28 |
|
| 29 |
## Admin Review Backends
|
| 30 |
- `ADMIN_REVIEW_PROVIDER=auto` prefers NVIDIA hosted models when `NVIDIA_API_KEY` is present and the model looks hosted
|
| 31 |
- `ADMIN_REVIEW_PROVIDER=ollama` forces local review generation
|
| 32 |
- `ADMIN_REVIEW_PROVIDER=nvidia` forces NVIDIA hosted review generation
|
| 33 |
+
- Dual review uses `ADMIN_REVIEW_PRIMARY_*` and `ADMIN_REVIEW_SECONDARY_*` settings, then stores consensus for human approval
|
| 34 |
- Rotate and replace any leaked provider keys before putting them into runtime envs
|
docs/release_checklist.md
CHANGED
|
@@ -13,6 +13,8 @@
|
|
| 13 |
- `MASTER_ADMIN_KEY`
|
| 14 |
- graph/code index env vars if used
|
| 15 |
- if hosted admin review is enabled: `ADMIN_REVIEW_PROVIDER`, `ADMIN_REVIEW_MODEL`, `NVIDIA_API_KEY`, `NVIDIA_API_BASE_URL`
|
|
|
|
|
|
|
| 16 |
|
| 17 |
## Post-Deploy Smoke
|
| 18 |
- `GET /health`
|
|
@@ -21,8 +23,10 @@
|
|
| 21 |
- Query streaming still returns sources and a trace id
|
| 22 |
- Admin review loads traces and evaluation datasets
|
| 23 |
- Admin draft review works with the configured provider (`ollama` or NVIDIA hosted)
|
|
|
|
| 24 |
- Graph summary/search/export endpoints return tenant-scoped data
|
| 25 |
- Operator URL ingest accepts only safe public hosts and queues background ingestion
|
|
|
|
| 26 |
|
| 27 |
## Operator Checks
|
| 28 |
- Confirm reviewed eval rows and active eval rows are non-zero
|
|
|
|
| 13 |
- `MASTER_ADMIN_KEY`
|
| 14 |
- graph/code index env vars if used
|
| 15 |
- if hosted admin review is enabled: `ADMIN_REVIEW_PROVIDER`, `ADMIN_REVIEW_MODEL`, `NVIDIA_API_KEY`, `NVIDIA_API_BASE_URL`
|
| 16 |
+
- if dual review is enabled: `ADMIN_REVIEW_PRIMARY_MODEL`, `ADMIN_REVIEW_SECONDARY_MODEL`, `ADMIN_REVIEW_CONSENSUS_ENABLED`
|
| 17 |
+
- if Crawl4AI URL extraction is enabled: `URL_INGEST_EXTRACTOR`, `URL_INGEST_CRAWL4AI_ENABLED`, `URL_INGEST_ALLOWED_HOSTS`
|
| 18 |
|
| 19 |
## Post-Deploy Smoke
|
| 20 |
- `GET /health`
|
|
|
|
| 23 |
- Query streaming still returns sources and a trace id
|
| 24 |
- Admin review loads traces and evaluation datasets
|
| 25 |
- Admin draft review works with the configured provider (`ollama` or NVIDIA hosted)
|
| 26 |
+
- Dual admin draft review shows primary, secondary, and consensus status when enabled
|
| 27 |
- Graph summary/search/export endpoints return tenant-scoped data
|
| 28 |
- Operator URL ingest accepts only safe public hosts and queues background ingestion
|
| 29 |
+
- Bounded URL crawl is constrained by allowed hosts, depth, and page budget
|
| 30 |
|
| 31 |
## Operator Checks
|
| 32 |
- Confirm reviewed eval rows and active eval rows are non-zero
|
frontend/index.html
CHANGED
|
@@ -515,6 +515,14 @@
|
|
| 515 |
<div class="confirm-zone" id="graphUrlOperatorHelp">URL ingestion is unavailable.</div>
|
| 516 |
<input type="text" id="graphUrlInput" placeholder="https://example.com/docs/page" style="margin-top:10px;" />
|
| 517 |
<input type="text" id="graphUrlLabel" placeholder="Optional display label…" style="margin-top:8px;" />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-top:10px;">
|
| 519 |
<button class="btn-primary" onclick="runOperatorUrlIngest()">INGEST URL</button>
|
| 520 |
</div>
|
|
|
|
| 515 |
<div class="confirm-zone" id="graphUrlOperatorHelp">URL ingestion is unavailable.</div>
|
| 516 |
<input type="text" id="graphUrlInput" placeholder="https://example.com/docs/page" style="margin-top:10px;" />
|
| 517 |
<input type="text" id="graphUrlLabel" placeholder="Optional display label…" style="margin-top:8px;" />
|
| 518 |
+
<select id="graphUrlMode" onchange="syncGraphUrlMode()" style="margin-top:8px;">
|
| 519 |
+
<option value="single_page">Single page</option>
|
| 520 |
+
<option value="bounded_crawl">Bounded crawl</option>
|
| 521 |
+
</select>
|
| 522 |
+
<div id="graphUrlCrawlFields" style="display:none;gap:8px;flex-wrap:wrap;margin-top:8px;">
|
| 523 |
+
<input type="number" id="graphUrlMaxDepth" min="0" max="3" value="1" placeholder="Max depth" style="flex:1;min-width:100px;" />
|
| 524 |
+
<input type="number" id="graphUrlMaxPages" min="1" max="20" value="5" placeholder="Max pages" style="flex:1;min-width:100px;" />
|
| 525 |
+
</div>
|
| 526 |
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-top:10px;">
|
| 527 |
<button class="btn-primary" onclick="runOperatorUrlIngest()">INGEST URL</button>
|
| 528 |
</div>
|
frontend/js/admin.js
CHANGED
|
@@ -157,6 +157,12 @@ function _renderEvalDatasetList(rows) {
|
|
| 157 |
const categories = Array.isArray(row.document_types) ? row.document_types : [];
|
| 158 |
const draft = row.ai_review_draft || {};
|
| 159 |
const hasDraft = Object.keys(draft).length > 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
return `
|
| 161 |
<div style="padding:10px;border:1px solid #22304a;border-radius:10px;background:rgba(7,12,24,0.72);margin-bottom:10px;">
|
| 162 |
<div style="display:flex;justify-content:space-between;gap:12px;align-items:flex-start;">
|
|
@@ -177,12 +183,18 @@ function _renderEvalDatasetList(rows) {
|
|
| 177 |
<div style="margin-top:10px;padding:10px;border:1px solid #243142;border-radius:8px;background:rgba(10,18,32,0.55);">
|
| 178 |
<div style="font-size:0.76rem;color:#7dd3fc;letter-spacing:0.12em;text-transform:uppercase;">Gemma Draft</div>
|
| 179 |
<div style="margin-top:6px;">
|
| 180 |
-
${_adminBadge(`verdict ${
|
| 181 |
-
${_adminBadge(`confidence ${Number(
|
| 182 |
-
${
|
|
|
|
| 183 |
</div>
|
| 184 |
-
<div style="margin-top:8px;font-size:0.8rem;color:#cbd5e1;white-space:pre-wrap;">${esc(
|
| 185 |
-
${
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
</div>
|
| 187 |
` : ''}
|
| 188 |
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-top:10px;">
|
|
@@ -347,8 +359,18 @@ async function generateAdminEvalDraft(datasetId) {
|
|
| 347 |
if (!STATE.adminKey) return;
|
| 348 |
const modelRaw = window.prompt('Review model for draft review', 'gemma4:latest');
|
| 349 |
if (modelRaw === null) return;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
await apiAdminGenerateEvalDraft(STATE.adminKey, datasetId, {
|
| 351 |
model: modelRaw.trim() || null,
|
|
|
|
|
|
|
|
|
|
| 352 |
force: true,
|
| 353 |
});
|
| 354 |
toast('Gemma draft review generated.', 'success');
|
|
@@ -361,9 +383,19 @@ async function batchGenerateAdminEvalDrafts() {
|
|
| 361 |
if (limitRaw === null) return;
|
| 362 |
const modelRaw = window.prompt('Review model for batch drafts', 'gemma4:latest');
|
| 363 |
if (modelRaw === null) return;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
const result = await apiAdminBatchGenerateEvalDrafts(STATE.adminKey, {
|
| 365 |
limit: Number(limitRaw) || 10,
|
| 366 |
model: modelRaw.trim() || null,
|
|
|
|
|
|
|
|
|
|
| 367 |
force: false,
|
| 368 |
});
|
| 369 |
toast(`Generated ${Number(result.generated_count || 0)} Gemma draft(s).`, 'success');
|
|
|
|
| 157 |
const categories = Array.isArray(row.document_types) ? row.document_types : [];
|
| 158 |
const draft = row.ai_review_draft || {};
|
| 159 |
const hasDraft = Object.keys(draft).length > 0;
|
| 160 |
+
const primaryDraft = draft.primary || draft;
|
| 161 |
+
const secondaryDraft = draft.secondary || null;
|
| 162 |
+
const consensus = draft.consensus || null;
|
| 163 |
+
const displayDraft = consensus || primaryDraft;
|
| 164 |
+
const agreementStatus = consensus?.agreement_status || (hasDraft ? 'single_reviewer' : null);
|
| 165 |
+
const agreementTone = agreementStatus === 'disagreement' ? 'danger' : agreementStatus === 'agreement' ? 'success' : 'muted';
|
| 166 |
return `
|
| 167 |
<div style="padding:10px;border:1px solid #22304a;border-radius:10px;background:rgba(7,12,24,0.72);margin-bottom:10px;">
|
| 168 |
<div style="display:flex;justify-content:space-between;gap:12px;align-items:flex-start;">
|
|
|
|
| 183 |
<div style="margin-top:10px;padding:10px;border:1px solid #243142;border-radius:8px;background:rgba(10,18,32,0.55);">
|
| 184 |
<div style="font-size:0.76rem;color:#7dd3fc;letter-spacing:0.12em;text-transform:uppercase;">Gemma Draft</div>
|
| 185 |
<div style="margin-top:6px;">
|
| 186 |
+
${_adminBadge(`verdict ${displayDraft.manual_verdict || 'partial'}`, displayDraft.manual_verdict === 'wrong' ? 'danger' : displayDraft.manual_verdict === 'correct' ? 'success' : 'muted')}
|
| 187 |
+
${_adminBadge(`confidence ${Number(displayDraft.confidence || 0).toFixed(2)}`)}
|
| 188 |
+
${agreementStatus ? _adminBadge(`agreement ${agreementStatus}`, agreementTone) : ''}
|
| 189 |
+
${displayDraft.failure_mode ? _adminBadge(`failure ${displayDraft.failure_mode}`, 'danger') : ''}
|
| 190 |
</div>
|
| 191 |
+
<div style="margin-top:8px;font-size:0.8rem;color:#cbd5e1;white-space:pre-wrap;">${esc(displayDraft.gold_answer_text || '')}</div>
|
| 192 |
+
${displayDraft.rationale ? `<div style="margin-top:6px;font-size:0.78rem;color:#94a3b8;">${esc(displayDraft.rationale)}</div>` : ''}
|
| 193 |
+
<div style="margin-top:8px;font-size:0.74rem;color:#94a3b8;">
|
| 194 |
+
primary ${esc(primaryDraft.provider || 'unknown')}/${esc(primaryDraft.model || 'unknown')}
|
| 195 |
+
${secondaryDraft ? ` · secondary ${esc(secondaryDraft.provider || 'unknown')}/${esc(secondaryDraft.model || 'unknown')}` : ''}
|
| 196 |
+
</div>
|
| 197 |
+
${agreementStatus === 'disagreement' ? `<div style="margin-top:8px;color:#fb7185;font-size:0.78rem;">Reviewers disagreed. Edit manually before applying.</div>` : ''}
|
| 198 |
</div>
|
| 199 |
` : ''}
|
| 200 |
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-top:10px;">
|
|
|
|
| 359 |
if (!STATE.adminKey) return;
|
| 360 |
const modelRaw = window.prompt('Review model for draft review', 'gemma4:latest');
|
| 361 |
if (modelRaw === null) return;
|
| 362 |
+
const dualReview = window.confirm('Run dual-review with NVIDIA + local Ollama when configured?');
|
| 363 |
+
let primaryModel = null;
|
| 364 |
+
let secondaryModel = null;
|
| 365 |
+
if (dualReview) {
|
| 366 |
+
primaryModel = window.prompt('Primary review model (blank = configured NVIDIA default)', '') || null;
|
| 367 |
+
secondaryModel = window.prompt('Secondary review model (blank = configured local default)', '') || null;
|
| 368 |
+
}
|
| 369 |
await apiAdminGenerateEvalDraft(STATE.adminKey, datasetId, {
|
| 370 |
model: modelRaw.trim() || null,
|
| 371 |
+
primary_model: primaryModel?.trim() || null,
|
| 372 |
+
secondary_model: secondaryModel?.trim() || null,
|
| 373 |
+
dual_review: dualReview,
|
| 374 |
force: true,
|
| 375 |
});
|
| 376 |
toast('Gemma draft review generated.', 'success');
|
|
|
|
| 383 |
if (limitRaw === null) return;
|
| 384 |
const modelRaw = window.prompt('Review model for batch drafts', 'gemma4:latest');
|
| 385 |
if (modelRaw === null) return;
|
| 386 |
+
const dualReview = window.confirm('Run dual-review for this batch? Recommended when NVIDIA and local Ollama are configured.');
|
| 387 |
+
let primaryModel = null;
|
| 388 |
+
let secondaryModel = null;
|
| 389 |
+
if (dualReview) {
|
| 390 |
+
primaryModel = window.prompt('Primary review model (blank = configured NVIDIA default)', '') || null;
|
| 391 |
+
secondaryModel = window.prompt('Secondary review model (blank = configured local default)', '') || null;
|
| 392 |
+
}
|
| 393 |
const result = await apiAdminBatchGenerateEvalDrafts(STATE.adminKey, {
|
| 394 |
limit: Number(limitRaw) || 10,
|
| 395 |
model: modelRaw.trim() || null,
|
| 396 |
+
primary_model: primaryModel?.trim() || null,
|
| 397 |
+
secondary_model: secondaryModel?.trim() || null,
|
| 398 |
+
dual_review: dualReview,
|
| 399 |
force: false,
|
| 400 |
});
|
| 401 |
toast(`Generated ${Number(result.generated_count || 0)} Gemma draft(s).`, 'success');
|
frontend/js/graph.js
CHANGED
|
@@ -491,6 +491,7 @@ async function refreshGraphOperatorState() {
|
|
| 491 |
STATE.graphAllowedRoots = options.allowed_roots || [];
|
| 492 |
STATE.graphIndexDefaultRoot = options.default_root || '';
|
| 493 |
STATE.graphAllowedHosts = urlOptions.allowed_hosts || [];
|
|
|
|
| 494 |
if (document.getElementById('graphIndexRoot') && !document.getElementById('graphIndexRoot').value) {
|
| 495 |
document.getElementById('graphIndexRoot').value = STATE.graphIndexDefaultRoot || '';
|
| 496 |
}
|
|
@@ -498,9 +499,10 @@ async function refreshGraphOperatorState() {
|
|
| 498 |
? `Allowed roots:<br>${STATE.graphAllowedRoots.map(root => `<code>${esc(root)}</code>`).join('<br>')}`
|
| 499 |
: 'No operator code roots are configured.';
|
| 500 |
if (urlHelp) {
|
|
|
|
| 501 |
urlHelp.innerHTML = STATE.graphAllowedHosts.length
|
| 502 |
-
? `Allowed hosts:<br>${STATE.graphAllowedHosts.map(host => `<code>${esc(host)}</code>`).join('<br>')}`
|
| 503 |
-
:
|
| 504 |
}
|
| 505 |
}
|
| 506 |
|
|
@@ -577,6 +579,9 @@ async function runOperatorUrlIngest() {
|
|
| 577 |
}
|
| 578 |
const url = document.getElementById('graphUrlInput')?.value?.trim() || '';
|
| 579 |
const label = document.getElementById('graphUrlLabel')?.value?.trim() || '';
|
|
|
|
|
|
|
|
|
|
| 580 |
if (!url) {
|
| 581 |
toast('Enter a URL to ingest first.', 'error');
|
| 582 |
return;
|
|
@@ -584,10 +589,19 @@ async function runOperatorUrlIngest() {
|
|
| 584 |
const result = await apiAdminIngestUrl(STATE.adminKey, {
|
| 585 |
url,
|
| 586 |
label: label || null,
|
|
|
|
|
|
|
|
|
|
| 587 |
});
|
| 588 |
toast(`Queued URL ingestion for ${result.final_url || result.source_url}.`, 'success');
|
| 589 |
}
|
| 590 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
function downloadGraphExport() {
|
| 592 |
const snapshot = STATE.graphSnapshot;
|
| 593 |
if (!snapshot) {
|
|
@@ -695,6 +709,7 @@ window.runGraphSearch = runGraphSearch;
|
|
| 695 |
window.runGraphPath = runGraphPath;
|
| 696 |
window.runOperatorCodeIndex = runOperatorCodeIndex;
|
| 697 |
window.runOperatorUrlIngest = runOperatorUrlIngest;
|
|
|
|
| 698 |
window.downloadGraphExport = downloadGraphExport;
|
| 699 |
window.selectGraphNode = selectGraphNode;
|
| 700 |
window.selectGraphNodeByKey = selectGraphNodeByKey;
|
|
|
|
| 491 |
STATE.graphAllowedRoots = options.allowed_roots || [];
|
| 492 |
STATE.graphIndexDefaultRoot = options.default_root || '';
|
| 493 |
STATE.graphAllowedHosts = urlOptions.allowed_hosts || [];
|
| 494 |
+
STATE.graphUrlOptions = urlOptions || {};
|
| 495 |
if (document.getElementById('graphIndexRoot') && !document.getElementById('graphIndexRoot').value) {
|
| 496 |
document.getElementById('graphIndexRoot').value = STATE.graphIndexDefaultRoot || '';
|
| 497 |
}
|
|
|
|
| 499 |
? `Allowed roots:<br>${STATE.graphAllowedRoots.map(root => `<code>${esc(root)}</code>`).join('<br>')}`
|
| 500 |
: 'No operator code roots are configured.';
|
| 501 |
if (urlHelp) {
|
| 502 |
+
const limits = `mode ${esc(urlOptions.default_mode || 'single_page')} · max depth ${Number(urlOptions.max_depth || 0)} · max pages ${Number(urlOptions.max_pages || 1)} · extractor ${esc(urlOptions.extractor || 'basic')}`;
|
| 503 |
urlHelp.innerHTML = STATE.graphAllowedHosts.length
|
| 504 |
+
? `${limits}<br>Allowed hosts:<br>${STATE.graphAllowedHosts.map(host => `<code>${esc(host)}</code>`).join('<br>')}`
|
| 505 |
+
: `${limits}<br>No host allowlist is configured. Single-page ingestion still blocks private and non-routable hosts; bounded crawl requires an allowlist.`;
|
| 506 |
}
|
| 507 |
}
|
| 508 |
|
|
|
|
| 579 |
}
|
| 580 |
const url = document.getElementById('graphUrlInput')?.value?.trim() || '';
|
| 581 |
const label = document.getElementById('graphUrlLabel')?.value?.trim() || '';
|
| 582 |
+
const mode = document.getElementById('graphUrlMode')?.value || 'single_page';
|
| 583 |
+
const maxDepth = Number(document.getElementById('graphUrlMaxDepth')?.value || 1);
|
| 584 |
+
const maxPages = Number(document.getElementById('graphUrlMaxPages')?.value || 5);
|
| 585 |
if (!url) {
|
| 586 |
toast('Enter a URL to ingest first.', 'error');
|
| 587 |
return;
|
|
|
|
| 589 |
const result = await apiAdminIngestUrl(STATE.adminKey, {
|
| 590 |
url,
|
| 591 |
label: label || null,
|
| 592 |
+
mode,
|
| 593 |
+
max_depth: maxDepth,
|
| 594 |
+
max_pages: maxPages,
|
| 595 |
});
|
| 596 |
toast(`Queued URL ingestion for ${result.final_url || result.source_url}.`, 'success');
|
| 597 |
}
|
| 598 |
|
| 599 |
+
function syncGraphUrlMode() {
|
| 600 |
+
const mode = document.getElementById('graphUrlMode')?.value || 'single_page';
|
| 601 |
+
const crawlFields = document.getElementById('graphUrlCrawlFields');
|
| 602 |
+
if (crawlFields) crawlFields.style.display = mode === 'bounded_crawl' ? 'flex' : 'none';
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
function downloadGraphExport() {
|
| 606 |
const snapshot = STATE.graphSnapshot;
|
| 607 |
if (!snapshot) {
|
|
|
|
| 709 |
window.runGraphPath = runGraphPath;
|
| 710 |
window.runOperatorCodeIndex = runOperatorCodeIndex;
|
| 711 |
window.runOperatorUrlIngest = runOperatorUrlIngest;
|
| 712 |
+
window.syncGraphUrlMode = syncGraphUrlMode;
|
| 713 |
window.downloadGraphExport = downloadGraphExport;
|
| 714 |
window.selectGraphNode = selectGraphNode;
|
| 715 |
window.selectGraphNodeByKey = selectGraphNodeByKey;
|
frontend/js/state.js
CHANGED
|
@@ -37,6 +37,7 @@ const STATE = {
|
|
| 37 |
graphAllowedRoots: [],
|
| 38 |
graphIndexDefaultRoot: '',
|
| 39 |
graphAllowedHosts: [],
|
|
|
|
| 40 |
};
|
| 41 |
|
| 42 |
function stateRefreshCategories() {
|
|
|
|
| 37 |
graphAllowedRoots: [],
|
| 38 |
graphIndexDefaultRoot: '',
|
| 39 |
graphAllowedHosts: [],
|
| 40 |
+
graphUrlOptions: {},
|
| 41 |
};
|
| 42 |
|
| 43 |
function stateRefreshCategories() {
|
tests/test_pipeline_regressions.py
CHANGED
|
@@ -9,6 +9,8 @@ from pathlib import Path
|
|
| 9 |
from types import SimpleNamespace
|
| 10 |
|
| 11 |
from langchain_core.documents import Document
|
|
|
|
|
|
|
| 12 |
from starlette.requests import Request
|
| 13 |
|
| 14 |
from backend.api import admin
|
|
@@ -3022,6 +3024,80 @@ def test_draft_reviewer_can_route_to_nvidia_hosted_model(monkeypatch):
|
|
| 3022 |
assert captured["kwargs"]["enable_thinking"] is True
|
| 3023 |
|
| 3024 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3025 |
def test_assess_profile_change_accepts_only_when_acceptance_rules_pass():
|
| 3026 |
dataset = [
|
| 3027 |
{"question": f"q{i}", "manual_verdict": "partial" if i else "correct"}
|
|
|
|
| 9 |
from types import SimpleNamespace
|
| 10 |
|
| 11 |
from langchain_core.documents import Document
|
| 12 |
+
import pytest
|
| 13 |
+
from fastapi import HTTPException
|
| 14 |
from starlette.requests import Request
|
| 15 |
|
| 16 |
from backend.api import admin
|
|
|
|
| 3024 |
assert captured["kwargs"]["enable_thinking"] is True
|
| 3025 |
|
| 3026 |
|
| 3027 |
+
def test_draft_reviewer_dual_review_builds_agreement_consensus(monkeypatch):
|
| 3028 |
+
import backend.eval.draft_reviewer as draft_reviewer
|
| 3029 |
+
|
| 3030 |
+
def _fake_generate(*, model, provider=None, **_kwargs):
|
| 3031 |
+
del provider
|
| 3032 |
+
if model == "google/gemma-4-31b-it":
|
| 3033 |
+
answer = "The guide exists because definitions are not enough."
|
| 3034 |
+
else:
|
| 3035 |
+
answer = "The guide exists because knowing definitions is not enough."
|
| 3036 |
+
return json.dumps(
|
| 3037 |
+
{
|
| 3038 |
+
"manual_verdict": "correct",
|
| 3039 |
+
"gold_answer_text": answer,
|
| 3040 |
+
"gold_evidence_text": "This guide exists because knowing definitions is not enough.",
|
| 3041 |
+
"failure_mode": None,
|
| 3042 |
+
"confidence": 0.91,
|
| 3043 |
+
"rationale": "Grounded in the trace.",
|
| 3044 |
+
}
|
| 3045 |
+
)
|
| 3046 |
+
|
| 3047 |
+
monkeypatch.setattr(draft_reviewer, "_generate_review_text", _fake_generate)
|
| 3048 |
+
|
| 3049 |
+
result = draft_reviewer.generate_eval_dataset_dual_draft_review(
|
| 3050 |
+
{
|
| 3051 |
+
"question": "Why does this guide exist?",
|
| 3052 |
+
"gold_answer_text": "",
|
| 3053 |
+
"gold_evidence_text": "",
|
| 3054 |
+
},
|
| 3055 |
+
primary_model="google/gemma-4-31b-it",
|
| 3056 |
+
secondary_model="gemma4:26b",
|
| 3057 |
+
primary_provider="nvidia",
|
| 3058 |
+
secondary_provider="ollama",
|
| 3059 |
+
)
|
| 3060 |
+
|
| 3061 |
+
assert result["primary"]["provider"] == "nvidia"
|
| 3062 |
+
assert result["secondary"]["provider"] == "ollama"
|
| 3063 |
+
assert result["consensus"]["agreement_status"] == "agreement"
|
| 3064 |
+
assert result["consensus"]["apply_ready"] is True
|
| 3065 |
+
|
| 3066 |
+
|
| 3067 |
+
def test_admin_apply_eval_dataset_dual_draft_blocks_disagreement(monkeypatch):
|
| 3068 |
+
fake_service = FakeServiceSupabase()
|
| 3069 |
+
fake_service.eval_rows.append(
|
| 3070 |
+
{
|
| 3071 |
+
"id": 33,
|
| 3072 |
+
"trace_id": "trace-ai-disagree",
|
| 3073 |
+
"question": "What is the title?",
|
| 3074 |
+
"manual_verdict": None,
|
| 3075 |
+
"is_active": False,
|
| 3076 |
+
"ai_review_draft": {
|
| 3077 |
+
"primary": {"manual_verdict": "correct", "model": "google/gemma-4-31b-it"},
|
| 3078 |
+
"secondary": {"manual_verdict": "wrong", "model": "gemma4:26b"},
|
| 3079 |
+
"consensus": {
|
| 3080 |
+
"manual_verdict": "partial",
|
| 3081 |
+
"gold_answer_text": "",
|
| 3082 |
+
"gold_evidence_text": "",
|
| 3083 |
+
"failure_mode": "other",
|
| 3084 |
+
"confidence": 0.4,
|
| 3085 |
+
"agreement_status": "disagreement",
|
| 3086 |
+
"apply_ready": False,
|
| 3087 |
+
},
|
| 3088 |
+
},
|
| 3089 |
+
}
|
| 3090 |
+
)
|
| 3091 |
+
|
| 3092 |
+
monkeypatch.setattr(admin, "_admin_client", lambda: fake_service)
|
| 3093 |
+
monkeypatch.setenv("MASTER_ADMIN_KEY", "secret")
|
| 3094 |
+
|
| 3095 |
+
with pytest.raises(HTTPException) as exc_info:
|
| 3096 |
+
admin.apply_eval_dataset_draft_review(33, x_admin_key="secret")
|
| 3097 |
+
|
| 3098 |
+
assert exc_info.value.status_code == 409
|
| 3099 |
+
|
| 3100 |
+
|
| 3101 |
def test_assess_profile_change_accepts_only_when_acceptance_rules_pass():
|
| 3102 |
dataset = [
|
| 3103 |
{"question": f"q{i}", "manual_verdict": "partial" if i else "correct"}
|
tests/test_url_ingestion.py
CHANGED
|
@@ -86,6 +86,81 @@ def test_fetch_url_to_tempfile_extracts_readable_html(monkeypatch):
|
|
| 86 |
assert "Hybrid retrieval works." in rendered
|
| 87 |
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def test_run_ingestion_with_url_override_persists_url_metadata(monkeypatch):
|
| 90 |
from tests.test_pipeline_regressions import FakeIngestionSupabase
|
| 91 |
|
|
@@ -154,7 +229,7 @@ def test_run_url_ingest_requires_admin_and_queues_task(monkeypatch):
|
|
| 154 |
monkeypatch.setattr(
|
| 155 |
admin_api,
|
| 156 |
"fetch_url_to_tempfile",
|
| 157 |
-
lambda url, label=None: SimpleNamespace(
|
| 158 |
source_url=url,
|
| 159 |
final_url=url,
|
| 160 |
filename="Morpheus-Docs.md",
|
|
@@ -162,12 +237,17 @@ def test_run_url_ingest_requires_admin_and_queues_task(monkeypatch):
|
|
| 162 |
content_type="text/html",
|
| 163 |
content_bytes=1234,
|
| 164 |
title=label or "Morpheus Docs",
|
|
|
|
| 165 |
),
|
| 166 |
)
|
| 167 |
monkeypatch.setattr(admin_api.process_document_task, "delay", lambda *args: _FakeTask())
|
| 168 |
|
| 169 |
result = admin_api.run_url_ingest(
|
| 170 |
-
admin_api.UrlIngestPayload(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
x_admin_key="admin",
|
| 172 |
x_auth_token="token",
|
| 173 |
user_id="user-1",
|
|
@@ -176,3 +256,4 @@ def test_run_url_ingest_requires_admin_and_queues_task(monkeypatch):
|
|
| 176 |
assert result["ok"] is True
|
| 177 |
assert result["task_id"] == "task-123"
|
| 178 |
assert result["filename"] == "Morpheus-Docs.md"
|
|
|
|
|
|
| 86 |
assert "Hybrid retrieval works." in rendered
|
| 87 |
|
| 88 |
|
| 89 |
+
def test_fetch_url_to_tempfile_uses_crawl4ai_extractor_when_enabled(monkeypatch):
|
| 90 |
+
monkeypatch.setattr(url_ingestion.config, "URL_INGEST_EXTRACTOR", "crawl4ai")
|
| 91 |
+
monkeypatch.setattr(
|
| 92 |
+
url_ingestion,
|
| 93 |
+
"_run_crawl4ai_single_page",
|
| 94 |
+
lambda url, label=None: url_ingestion.UrlPage(
|
| 95 |
+
source_url=url,
|
| 96 |
+
final_url=url,
|
| 97 |
+
title=label or "Crawl4AI Page",
|
| 98 |
+
markdown="# Crawl4AI Page\n\nClean markdown.",
|
| 99 |
+
content_type="text/markdown",
|
| 100 |
+
content_bytes=32,
|
| 101 |
+
links=[],
|
| 102 |
+
),
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
result = url_ingestion.fetch_url_to_tempfile("https://docs.example.com/guide")
|
| 106 |
+
|
| 107 |
+
assert result.title == "Crawl4AI Page"
|
| 108 |
+
with open(result.temp_path, "r", encoding="utf-8") as handle:
|
| 109 |
+
rendered = handle.read()
|
| 110 |
+
assert "Clean markdown." in rendered
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def test_fetch_url_to_tempfile_bounded_crawl_combines_pages(monkeypatch):
|
| 114 |
+
monkeypatch.setattr(url_ingestion.config, "URL_INGEST_ALLOWED_HOSTS", ["docs.example.com"])
|
| 115 |
+
monkeypatch.setattr(url_ingestion.config, "URL_INGEST_MAX_DEPTH", 1)
|
| 116 |
+
monkeypatch.setattr(url_ingestion.config, "URL_INGEST_MAX_PAGES", 3)
|
| 117 |
+
monkeypatch.setattr(
|
| 118 |
+
url_ingestion.socket,
|
| 119 |
+
"getaddrinfo",
|
| 120 |
+
lambda *args, **kwargs: [(None, None, None, None, ("93.184.216.34", 443))],
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
def _fake_fetch(url, label=None):
|
| 124 |
+
links = ["https://docs.example.com/second", "https://other.example.com/blocked"] if url.endswith("/start") else []
|
| 125 |
+
return url_ingestion.UrlPage(
|
| 126 |
+
source_url=url,
|
| 127 |
+
final_url=url,
|
| 128 |
+
title=label or url.rsplit("/", 1)[-1],
|
| 129 |
+
markdown=f"# {url}\n\nPage body.",
|
| 130 |
+
content_type="text/html",
|
| 131 |
+
content_bytes=100,
|
| 132 |
+
links=links,
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
monkeypatch.setattr(url_ingestion, "_fetch_url_page", _fake_fetch)
|
| 136 |
+
|
| 137 |
+
result = url_ingestion.fetch_url_to_tempfile(
|
| 138 |
+
"https://docs.example.com/start",
|
| 139 |
+
mode="bounded_crawl",
|
| 140 |
+
max_depth=1,
|
| 141 |
+
max_pages=3,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
assert len(result.pages) == 2
|
| 145 |
+
with open(result.temp_path, "r", encoding="utf-8") as handle:
|
| 146 |
+
rendered = handle.read()
|
| 147 |
+
assert "https://docs.example.com/start" in rendered
|
| 148 |
+
assert "https://docs.example.com/second" in rendered
|
| 149 |
+
assert "other.example.com" not in rendered
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def test_fetch_url_to_tempfile_bounded_crawl_requires_allowlist(monkeypatch):
|
| 153 |
+
monkeypatch.setattr(url_ingestion.config, "URL_INGEST_ALLOWED_HOSTS", [])
|
| 154 |
+
|
| 155 |
+
with pytest.raises(url_ingestion.UrlIngestionError, match="requires URL_INGEST_ALLOWED_HOSTS"):
|
| 156 |
+
url_ingestion.fetch_url_to_tempfile(
|
| 157 |
+
"https://docs.example.com/start",
|
| 158 |
+
mode="bounded_crawl",
|
| 159 |
+
max_depth=1,
|
| 160 |
+
max_pages=2,
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
def test_run_ingestion_with_url_override_persists_url_metadata(monkeypatch):
|
| 165 |
from tests.test_pipeline_regressions import FakeIngestionSupabase
|
| 166 |
|
|
|
|
| 229 |
monkeypatch.setattr(
|
| 230 |
admin_api,
|
| 231 |
"fetch_url_to_tempfile",
|
| 232 |
+
lambda url, label=None, mode=None, max_depth=None, max_pages=None: SimpleNamespace(
|
| 233 |
source_url=url,
|
| 234 |
final_url=url,
|
| 235 |
filename="Morpheus-Docs.md",
|
|
|
|
| 237 |
content_type="text/html",
|
| 238 |
content_bytes=1234,
|
| 239 |
title=label or "Morpheus Docs",
|
| 240 |
+
pages=[{"final_url": url}],
|
| 241 |
),
|
| 242 |
)
|
| 243 |
monkeypatch.setattr(admin_api.process_document_task, "delay", lambda *args: _FakeTask())
|
| 244 |
|
| 245 |
result = admin_api.run_url_ingest(
|
| 246 |
+
admin_api.UrlIngestPayload(
|
| 247 |
+
url="https://docs.example.com/guide",
|
| 248 |
+
label="Morpheus Docs",
|
| 249 |
+
mode="single_page",
|
| 250 |
+
),
|
| 251 |
x_admin_key="admin",
|
| 252 |
x_auth_token="token",
|
| 253 |
user_id="user-1",
|
|
|
|
| 256 |
assert result["ok"] is True
|
| 257 |
assert result["task_id"] == "task-123"
|
| 258 |
assert result["filename"] == "Morpheus-Docs.md"
|
| 259 |
+
assert result["pages"] == [{"final_url": "https://docs.example.com/guide"}]
|