umanggarg commited on
Commit
9eea8ac
·
1 Parent(s): e528876

Phase 4: artifact provenance — model-tracking + debug endpoint

Browse files

Every cached artifact now records which model produced it, surfaced
via two channels:

1. Backend log on cache hit. _load_diagram, _load_tour, and the README
cache hit print "[cache hit] kind for repo (model)" so the project
owner can read the HF Spaces logs and see whether served traffic is
hitting premium-baked artifacts or fallback-tier ones.

2. Debug endpoint GET /repos/{owner}/{name}/artifacts/info returns
{"repo": ..., "artifacts": [{kind, generated_by_model, generated_at}]}.
Not linked from the UI — owner-facing curl target for auditing what
each repo's cache currently contains.

Save sites now thread the live model id through:
- DiagramService passes self._gen.current_model() to _save_tour and
_save_diagram on every persist path.
- ReadmeService does the same on its post-generation save.
- load_artifact_meta returns the full payload (data + provenance) so
the load helpers don't need a second call to fetch the model name.

backend/main.py CHANGED
@@ -135,6 +135,7 @@ app.include_router(agent.router)
135
  app.include_router(diagrams.router)
136
  app.include_router(mcp_routes.router)
137
  app.include_router(sessions.router)
 
138
 
139
 
140
  # ── Health check ───────────────────────────────────────────────────────────────
 
135
  app.include_router(diagrams.router)
136
  app.include_router(mcp_routes.router)
137
  app.include_router(sessions.router)
138
+ app.include_router(sessions.artifacts_router)
139
 
140
 
141
  # ── Health check ───────────────────────────────────────────────────────────────
backend/routers/sessions.py CHANGED
@@ -107,3 +107,27 @@ def delete_session(
107
  """Delete a session. Idempotent — non-existent ids return ok=True."""
108
  store.delete_session(session_id)
109
  return {"ok": True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  """Delete a session. Idempotent — non-existent ids return ok=True."""
108
  store.delete_session(session_id)
109
  return {"ok": True}
110
+
111
+
112
+ # ── Artifact provenance (debug-only inspection path) ──────────────────────
113
+ # Returns which model produced each cached artifact for a repo. Not linked
114
+ # from the UI; intended as a curl target so the project owner can audit
115
+ # cache contents (e.g. "is the nanoGPT tour using the premium model yet?").
116
+
117
+ artifacts_router = APIRouter(tags=["artifacts"])
118
+
119
+
120
+ @artifacts_router.get("/repos/{owner}/{name}/artifacts/info")
121
+ def list_artifact_info(
122
+ owner: str,
123
+ name: str,
124
+ store: Annotated[QdrantStore, Depends(get_qdrant_store)],
125
+ ):
126
+ """Return the kind / generated_by_model / generated_at of every cached
127
+ artifact for this repo. Used to audit pre-bake runs without touching
128
+ the live UI."""
129
+ repo = f"{owner}/{name}"
130
+ return {
131
+ "repo": repo,
132
+ "artifacts": store.list_artifacts(repo),
133
+ }
backend/services/diagram_service.py CHANGED
@@ -365,10 +365,11 @@ class DiagramService:
365
  # an in-memory hot cache for fast repeat reads within a process.
366
 
367
  def _load_diagram(self, repo: str, diagram_type: str) -> dict | None:
368
- data = self._store.load_artifact(repo, f"diagram_{diagram_type}")
369
- if data is not None:
370
- self._cache[(repo, diagram_type)] = data
371
- return data
 
372
  return None
373
 
374
  def _save_diagram(
@@ -389,10 +390,11 @@ class DiagramService:
389
  pass # persistence failure is non-fatal — memory cache still works
390
 
391
  def _load_tour(self, repo: str) -> dict | None:
392
- data = self._store.load_artifact(repo, "tour")
393
- if data is not None:
394
- self._tour_cache[repo] = data
395
- return data
 
396
  return None
397
 
398
  def _save_tour(self, repo: str, data: dict, model: str | None = None) -> None:
@@ -453,7 +455,7 @@ class DiagramService:
453
  return {"error": "Could not generate diagram. Try regenerating."}
454
 
455
  self._cache[cache_key] = data
456
- self._save_diagram(repo, diagram_type, data)
457
  return {"diagram": data, "type": diagram_type}
458
 
459
  def build_tour(self, repo: str) -> dict:
@@ -554,7 +556,7 @@ class DiagramService:
554
  c["depends_on"] = [d for d in c.get("depends_on", []) if d in valid_ids and d != c["id"]]
555
 
556
  self._tour_cache[repo] = tour
557
- self._save_tour(repo, tour)
558
  return tour
559
 
560
  def build_tour_stream(self, repo: str, force: bool = False):
@@ -602,7 +604,7 @@ class DiagramService:
602
  tour = {k: v for k, v in event.items()
603
  if k not in ("stage", "progress", "message", "trace")}
604
  self._tour_cache[repo] = tour
605
- self._save_tour(repo, tour) # overwrites old disk file on success
606
  elif event.get("stage") == "error" and force:
607
  # Generation failed — fall back to the old disk cache if available
608
  # so the user sees stale-but-valid data instead of a hard error
@@ -676,7 +678,7 @@ class DiagramService:
676
  return
677
 
678
  self._cache[cache_key] = data
679
- self._save_diagram(repo, diagram_type, data)
680
  yield {"stage": "done", "progress": 1.0, "diagram": data, "type": diagram_type}
681
 
682
  def invalidate(self, repo: str):
 
365
  # an in-memory hot cache for fast repeat reads within a process.
366
 
367
  def _load_diagram(self, repo: str, diagram_type: str) -> dict | None:
368
+ meta = self._store.load_artifact_meta(repo, f"diagram_{diagram_type}")
369
+ if meta and meta.get("data") is not None:
370
+ print(f"[cache hit] diagram_{diagram_type} for {repo} ({meta.get('generated_by_model','unknown')})")
371
+ self._cache[(repo, diagram_type)] = meta["data"]
372
+ return meta["data"]
373
  return None
374
 
375
  def _save_diagram(
 
390
  pass # persistence failure is non-fatal — memory cache still works
391
 
392
  def _load_tour(self, repo: str) -> dict | None:
393
+ meta = self._store.load_artifact_meta(repo, "tour")
394
+ if meta and meta.get("data") is not None:
395
+ print(f"[cache hit] tour for {repo} ({meta.get('generated_by_model','unknown')})")
396
+ self._tour_cache[repo] = meta["data"]
397
+ return meta["data"]
398
  return None
399
 
400
  def _save_tour(self, repo: str, data: dict, model: str | None = None) -> None:
 
455
  return {"error": "Could not generate diagram. Try regenerating."}
456
 
457
  self._cache[cache_key] = data
458
+ self._save_diagram(repo, diagram_type, data, model=self._gen.current_model())
459
  return {"diagram": data, "type": diagram_type}
460
 
461
  def build_tour(self, repo: str) -> dict:
 
556
  c["depends_on"] = [d for d in c.get("depends_on", []) if d in valid_ids and d != c["id"]]
557
 
558
  self._tour_cache[repo] = tour
559
+ self._save_tour(repo, tour, model=self._gen.current_model())
560
  return tour
561
 
562
  def build_tour_stream(self, repo: str, force: bool = False):
 
604
  tour = {k: v for k, v in event.items()
605
  if k not in ("stage", "progress", "message", "trace")}
606
  self._tour_cache[repo] = tour
607
+ self._save_tour(repo, tour, model=self._gen.current_model())
608
  elif event.get("stage") == "error" and force:
609
  # Generation failed — fall back to the old disk cache if available
610
  # so the user sees stale-but-valid data instead of a hard error
 
678
  return
679
 
680
  self._cache[cache_key] = data
681
+ self._save_diagram(repo, diagram_type, data, model=self._gen.current_model())
682
  yield {"stage": "done", "progress": 1.0, "diagram": data, "type": diagram_type}
683
 
684
  def invalidate(self, repo: str):
backend/services/readme_service.py CHANGED
@@ -70,10 +70,12 @@ class ReadmeService:
70
  """
71
  # ── Cache hit ─────────────────────────────────────────────────────────
72
  if not force:
73
- cached = self._store.load_artifact(repo, "readme")
74
- if cached and isinstance(cached, dict) and cached.get("content"):
 
75
  yield {"stage": "loading", "progress": 0.1, "message": "Loading cached README…"}
76
- yield {"stage": "done", "progress": 1.0, "content": cached["content"], "from_cache": True}
 
77
  return
78
 
79
  # ── Build repo map ────────────────────────────────────────────────────
@@ -216,7 +218,10 @@ Output ONLY the markdown. No preamble, no "Here is the README", no trailing comm
216
  content = _re.sub(r'^(#+ .+?)`+\s*$', r'\1', content, flags=_re.MULTILINE)
217
 
218
  # ── Cache + emit ──────────────────────────────────────────────────────
219
- self._store.save_artifact(repo, "readme", {"content": content})
 
 
 
220
  yield {"stage": "done", "progress": 1.0, "content": content, "from_cache": False}
221
 
222
  def invalidate(self, repo: str) -> None:
 
70
  """
71
  # ── Cache hit ─────────────────────────────────────────────────────────
72
  if not force:
73
+ meta = self._store.load_artifact_meta(repo, "readme")
74
+ if meta and meta.get("data") and meta["data"].get("content"):
75
+ print(f"[cache hit] readme for {repo} ({meta.get('generated_by_model', 'unknown')})")
76
  yield {"stage": "loading", "progress": 0.1, "message": "Loading cached README…"}
77
+ yield {"stage": "done", "progress": 1.0,
78
+ "content": meta["data"]["content"], "from_cache": True}
79
  return
80
 
81
  # ── Build repo map ────────────────────────────────────────────────────
 
218
  content = _re.sub(r'^(#+ .+?)`+\s*$', r'\1', content, flags=_re.MULTILINE)
219
 
220
  # ── Cache + emit ──────────────────────────────────────────────────────
221
+ self._store.save_artifact(
222
+ repo, "readme", {"content": content},
223
+ generated_by_model=self._gen.current_model(),
224
+ )
225
  yield {"stage": "done", "progress": 1.0, "content": content, "from_cache": False}
226
 
227
  def invalidate(self, repo: str) -> None: