Spaces:

umanggarg
/

cartographer

Running

umanggarg commited on 14 days ago

Commit

9eea8ac

1 Parent(s): e528876

Phase 4: artifact provenance — model-tracking + debug endpoint

Every cached artifact now records which model produced it, surfaced
via two channels:

1. Backend log on cache hit. _load_diagram, _load_tour, and the README
cache hit print "[cache hit] kind for repo (model)" so the project
owner can read the HF Spaces logs and see whether served traffic is
hitting premium-baked artifacts or fallback-tier ones.

2. Debug endpoint GET /repos/{owner}/{name}/artifacts/info returns
{"repo": ..., "artifacts": [{kind, generated_by_model, generated_at}]}.
Not linked from the UI — owner-facing curl target for auditing what
each repo's cache currently contains.

Save sites now thread the live model id through:
- DiagramService passes self._gen.current_model() to _save_tour and
_save_diagram on every persist path.
- ReadmeService does the same on its post-generation save.
- load_artifact_meta returns the full payload (data + provenance) so
the load helpers don't need a second call to fetch the model name.

Files changed (4) hide show

backend/main.py +1 -0
backend/routers/sessions.py +24 -0
backend/services/diagram_service.py +14 -12
backend/services/readme_service.py +9 -4

backend/main.py CHANGED Viewed

@@ -135,6 +135,7 @@ app.include_router(agent.router)
 app.include_router(diagrams.router)
 app.include_router(mcp_routes.router)
 app.include_router(sessions.router)
 # ── Health check ───────────────────────────────────────────────────────────────

 app.include_router(diagrams.router)
 app.include_router(mcp_routes.router)
 app.include_router(sessions.router)
+app.include_router(sessions.artifacts_router)
 # ── Health check ───────────────────────────────────────────────────────────────

backend/routers/sessions.py CHANGED Viewed

@@ -107,3 +107,27 @@ def delete_session(
     """Delete a session. Idempotent — non-existent ids return ok=True."""
     store.delete_session(session_id)
     return {"ok": True}

     """Delete a session. Idempotent — non-existent ids return ok=True."""
     store.delete_session(session_id)
     return {"ok": True}
+# ── Artifact provenance (debug-only inspection path) ──────────────────────
+# Returns which model produced each cached artifact for a repo. Not linked
+# from the UI; intended as a curl target so the project owner can audit
+# cache contents (e.g. "is the nanoGPT tour using the premium model yet?").
+artifacts_router = APIRouter(tags=["artifacts"])
+@artifacts_router.get("/repos/{owner}/{name}/artifacts/info")
+def list_artifact_info(
+    owner: str,
+    name:  str,
+    store: Annotated[QdrantStore, Depends(get_qdrant_store)],
+):
+    """Return the kind / generated_by_model / generated_at of every cached
+    artifact for this repo. Used to audit pre-bake runs without touching
+    the live UI."""
+    repo = f"{owner}/{name}"
+    return {
+        "repo":      repo,
+        "artifacts": store.list_artifacts(repo),
+    }

backend/services/diagram_service.py CHANGED Viewed

@@ -365,10 +365,11 @@ class DiagramService:
     # an in-memory hot cache for fast repeat reads within a process.
     def _load_diagram(self, repo: str, diagram_type: str) -> dict | None:
-        data = self._store.load_artifact(repo, f"diagram_{diagram_type}")
-        if data is not None:
-            self._cache[(repo, diagram_type)] = data
-            return data
         return None
     def _save_diagram(
@@ -389,10 +390,11 @@ class DiagramService:
             pass  # persistence failure is non-fatal — memory cache still works
     def _load_tour(self, repo: str) -> dict | None:
-        data = self._store.load_artifact(repo, "tour")
-        if data is not None:
-            self._tour_cache[repo] = data
-            return data
         return None
     def _save_tour(self, repo: str, data: dict, model: str | None = None) -> None:
@@ -453,7 +455,7 @@ class DiagramService:
             return {"error": "Could not generate diagram. Try regenerating."}
         self._cache[cache_key] = data
-        self._save_diagram(repo, diagram_type, data)
         return {"diagram": data, "type": diagram_type}
     def build_tour(self, repo: str) -> dict:
@@ -554,7 +556,7 @@ class DiagramService:
             c["depends_on"] = [d for d in c.get("depends_on", []) if d in valid_ids and d != c["id"]]
         self._tour_cache[repo] = tour
-        self._save_tour(repo, tour)
         return tour
     def build_tour_stream(self, repo: str, force: bool = False):
@@ -602,7 +604,7 @@ class DiagramService:
                 tour = {k: v for k, v in event.items()
                         if k not in ("stage", "progress", "message", "trace")}
                 self._tour_cache[repo] = tour
-                self._save_tour(repo, tour)  # overwrites old disk file on success
             elif event.get("stage") == "error" and force:
                 # Generation failed — fall back to the old disk cache if available
                 # so the user sees stale-but-valid data instead of a hard error
@@ -676,7 +678,7 @@ class DiagramService:
             return
         self._cache[cache_key] = data
-        self._save_diagram(repo, diagram_type, data)
         yield {"stage": "done", "progress": 1.0, "diagram": data, "type": diagram_type}
     def invalidate(self, repo: str):

     # an in-memory hot cache for fast repeat reads within a process.
     def _load_diagram(self, repo: str, diagram_type: str) -> dict | None:
+        meta = self._store.load_artifact_meta(repo, f"diagram_{diagram_type}")
+        if meta and meta.get("data") is not None:
+            print(f"[cache hit] diagram_{diagram_type} for {repo} ({meta.get('generated_by_model','unknown')})")
+            self._cache[(repo, diagram_type)] = meta["data"]
+            return meta["data"]
         return None
     def _save_diagram(
             pass  # persistence failure is non-fatal — memory cache still works
     def _load_tour(self, repo: str) -> dict | None:
+        meta = self._store.load_artifact_meta(repo, "tour")
+        if meta and meta.get("data") is not None:
+            print(f"[cache hit] tour for {repo} ({meta.get('generated_by_model','unknown')})")
+            self._tour_cache[repo] = meta["data"]
+            return meta["data"]
         return None
     def _save_tour(self, repo: str, data: dict, model: str | None = None) -> None:
             return {"error": "Could not generate diagram. Try regenerating."}
         self._cache[cache_key] = data
+        self._save_diagram(repo, diagram_type, data, model=self._gen.current_model())
         return {"diagram": data, "type": diagram_type}
     def build_tour(self, repo: str) -> dict:
             c["depends_on"] = [d for d in c.get("depends_on", []) if d in valid_ids and d != c["id"]]
         self._tour_cache[repo] = tour
+        self._save_tour(repo, tour, model=self._gen.current_model())
         return tour
     def build_tour_stream(self, repo: str, force: bool = False):
                 tour = {k: v for k, v in event.items()
                         if k not in ("stage", "progress", "message", "trace")}
                 self._tour_cache[repo] = tour
+                self._save_tour(repo, tour, model=self._gen.current_model())
             elif event.get("stage") == "error" and force:
                 # Generation failed — fall back to the old disk cache if available
                 # so the user sees stale-but-valid data instead of a hard error
             return
         self._cache[cache_key] = data
+        self._save_diagram(repo, diagram_type, data, model=self._gen.current_model())
         yield {"stage": "done", "progress": 1.0, "diagram": data, "type": diagram_type}
     def invalidate(self, repo: str):

backend/services/readme_service.py CHANGED Viewed

@@ -70,10 +70,12 @@ class ReadmeService:
         """
         # ── Cache hit ─────────────────────────────────────────────────────────
         if not force:
-            cached = self._store.load_artifact(repo, "readme")
-            if cached and isinstance(cached, dict) and cached.get("content"):
                 yield {"stage": "loading", "progress": 0.1, "message": "Loading cached README…"}
-                yield {"stage": "done", "progress": 1.0, "content": cached["content"], "from_cache": True}
                 return
         # ── Build repo map ────────────────────────────────────────────────────
@@ -216,7 +218,10 @@ Output ONLY the markdown. No preamble, no "Here is the README", no trailing comm
         content = _re.sub(r'^(#+ .+?)`+\s*$', r'\1', content, flags=_re.MULTILINE)
         # ── Cache + emit ──────────────────────────────────────────────────────
-        self._store.save_artifact(repo, "readme", {"content": content})
         yield {"stage": "done", "progress": 1.0, "content": content, "from_cache": False}
     def invalidate(self, repo: str) -> None:

         """
         # ── Cache hit ─────────────────────────────────────────────────────────
         if not force:
+            meta = self._store.load_artifact_meta(repo, "readme")
+            if meta and meta.get("data") and meta["data"].get("content"):
+                print(f"[cache hit] readme for {repo} ({meta.get('generated_by_model', 'unknown')})")
                 yield {"stage": "loading", "progress": 0.1, "message": "Loading cached README…"}
+                yield {"stage": "done", "progress": 1.0,
+                       "content": meta["data"]["content"], "from_cache": True}
                 return
         # ── Build repo map ────────────────────────────────────────────────────
         content = _re.sub(r'^(#+ .+?)`+\s*$', r'\1', content, flags=_re.MULTILINE)
         # ── Cache + emit ──────────────────────────────────────────────────────
+        self._store.save_artifact(
+            repo, "readme", {"content": content},
+            generated_by_model=self._gen.current_model(),
+        )
         yield {"stage": "done", "progress": 1.0, "content": content, "from_cache": False}
     def invalidate(self, repo: str) -> None: