Chris4K commited on
Commit
7321749
·
verified ·
1 Parent(s): cd43a29

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +141 -46
main.py CHANGED
@@ -234,62 +234,84 @@ def q_stats() -> dict:
234
  }
235
 
236
  # ---------------------------------------------------------------------------
237
- # Reward scoring
238
  # ---------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  def score_trace_event(ev: dict) -> tuple[float, dict]:
240
  """
241
- Score a trace event reward in [-1.0, 1.0].
242
  Returns (score, components).
243
  """
244
- components = {}
245
- score = 0.0
246
 
247
- # Base: error is always bad
248
  if ev.get("status") == "error":
249
- components["error_penalty"] = -0.4
250
- score -= 0.4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
- # Latency score for LLM calls (lower = better)
253
  lat = ev.get("latency_ms")
254
- if lat is not None and ev.get("event_type") == "llm_call":
255
- if lat < 500:
256
- v = 0.3; components["latency_fast"] = v
257
- elif lat < 1500:
258
- v = 0.1; components["latency_ok"] = v
259
- elif lat < 4000:
260
- v = -0.1; components["latency_slow"] = v
261
- else:
262
- v = -0.3; components["latency_very_slow"] = v
263
- score += v
264
-
265
- # Token efficiency for LLM calls
266
- tin = ev.get("tokens_in") or 0
267
- tout = ev.get("tokens_out") or 0
268
- if tin > 0 and tout > 0 and ev.get("event_type") == "llm_call":
269
- ratio = tout / max(tin, 1)
270
- if ratio > 0.5:
271
- v = 0.1; components["token_efficiency"] = v; score += v
272
- elif ratio < 0.05:
273
- v = -0.05; components["token_low_output"] = v; score += v
274
-
275
- # ReAct step: reward progress
276
- if ev.get("event_type") == "react_step":
277
- components["react_progress"] = 0.1
278
- score += 0.1
279
-
280
- # Skill load: reward reuse over re-implementation
281
  if ev.get("event_type") == "skill_load":
282
- components["skill_reuse"] = 0.15
283
- score += 0.15
284
 
285
- # Self-reflect: always reward
286
- if ev.get("event_type") == "self_reflect":
287
- components["reflection_bonus"] = 0.2
288
- score += 0.2
289
 
290
- # Clamp to [-1, 1]
291
- score = max(-1.0, min(1.0, score))
292
- return round(score, 4), components
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
  # ---------------------------------------------------------------------------
295
  # Trace sync pipeline
@@ -387,6 +409,21 @@ def pull_and_score_traces() -> dict:
387
  # Q-table update: map event → (state, action)
388
  _update_qtable_from_trace(ev, reward)
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  scored += 1
391
  reward_sum += reward
392
  new_cursors[agent] = max(new_cursors.get(agent, 0), ts)
@@ -663,6 +700,10 @@ MCP_TOOLS = [
663
  "properties":{"description":{"type":"string"},"agent":{"type":"string"}}}},
664
  {"name":"learn_sync","description":"Trigger immediate trace pull and reward scoring.",
665
  "inputSchema":{"type":"object","properties":{}}},
 
 
 
 
666
  ]
667
 
668
  def handle_mcp(method, params, req_id):
@@ -691,6 +732,24 @@ def handle_mcp(method, params, req_id):
691
  cid = candidate_add(a["description"],a["agent"])
692
  return txt({"ok":True,"id":cid})
693
  if n=="learn_sync": return txt(pull_and_score_traces())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
  return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Unknown tool: {n}"}}
695
  if method in ("notifications/initialized","notifications/cancelled"): return None
696
  return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Method not found: {method}"}}
@@ -768,6 +827,42 @@ async def api_rlhf_label(entry_id:str, request:Request):
768
  ok = rlhf_label(entry_id, b.get("label","unlabeled"), b.get("reward"))
769
  return JSONResponse({"ok":ok})
770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
771
  # --- Skill candidates ---
772
  @app.get("/api/candidates")
773
  async def api_candidates(status:str=Query("pending")):
@@ -1024,7 +1119,7 @@ function renderRewards(){
1024
  </div>
1025
  <div class="section">Scoring model</div>
1026
  <div style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden">
1027
- ${[['error_penalty','-0.40','Any event with status=error'],['latency_fast (LLM <500ms)','+0.30','LLM call completed quickly'],['latency_ok (500-1500ms)','+0.10','LLM call acceptable latency'],['latency_slow (1500-4000ms)','-0.10','LLM call slow'],['latency_very_slow (>4000ms)','-0.30','LLM call very slow'],['token_efficiency','+0.10','Output/input ratio > 0.5'],['react_progress','+0.10','Each ReAct step completed'],['skill_reuse','+0.15','Skill loaded from FORGE'],['reflection_bonus','+0.20','Agent performed self-reflection']].map(([k,v,d])=>`<div class="config-row"><span class="config-key">${k}</span><span class="config-val">${v}</span><span class="config-desc">${d}</span></div>`).join('')}
1028
  </div>`;
1029
  }
1030
 
@@ -1135,4 +1230,4 @@ loadAll();setInterval(loadAll,15000);
1135
  async def root(): return HTMLResponse(content=SPA, media_type="text/html; charset=utf-8")
1136
 
1137
  if __name__ == "__main__":
1138
- uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")
 
234
  }
235
 
236
  # ---------------------------------------------------------------------------
237
+ # Reward scoring — 0–10 float scale
238
  # ---------------------------------------------------------------------------
239
+ # Scale semantics:
240
+ # 0–1 catastrophic (PII leak, injection, critical safety failure)
241
+ # 2–3 failure (error, hallucinated tool, unrecoverable)
242
+ # 4–5 partial (slow, compensated saga, incomplete)
243
+ # 6 acceptable (baseline — completed without issues)
244
+ # 7 good (fast, used skill, memory stored)
245
+ # 8 excellent (all bonuses, fast, clean)
246
+ # 9 exceptional (auto ceiling — reserved for near-perfect)
247
+ # 10 human-only (PATCH /api/traces/{id}/rate override only)
248
+ #
249
+ # Auto-score is capped at 9.0.
250
+ # Human rating via PATCH /api/rlhf/{id} can set 10.
251
+ # RLHF auto-collection: score>=8 → preferred, score<=3 → rejected
252
+
253
+ SCORE_BASELINE = 6.0
254
+ SCORE_AUTO_CEILING = 9.0
255
+ SCORE_HUMAN_MAX = 10.0
256
+
257
  def score_trace_event(ev: dict) -> tuple[float, dict]:
258
  """
259
+ Score a trace event on a 0–10 float scale.
260
  Returns (score, components).
261
  """
262
+ components: dict = {}
263
+ score = SCORE_BASELINE
264
 
265
+ # ── Deductions ────────────────────────────────────────────────
266
  if ev.get("status") == "error":
267
+ components["error"] = -3.0
268
+ score -= 3.0
269
+
270
+ if ev.get("injection_detected"):
271
+ components["injection_detected"] = -4.0
272
+ score -= 4.0
273
+
274
+ if ev.get("pii_leaked"):
275
+ components["pii_leaked"] = -4.0
276
+ score -= 4.0
277
+
278
+ if ev.get("hallucinated_tool"):
279
+ components["hallucinated_tool"] = -3.0
280
+ score -= 3.0
281
+
282
+ if ev.get("saga_compensated"):
283
+ components["saga_compensated"] = -1.0
284
+ score -= 1.0
285
 
 
286
  lat = ev.get("latency_ms")
287
+ if lat is not None and lat > 8000:
288
+ components["latency_over_8s"] = -1.5
289
+ score -= 1.5
290
+
291
+ # ── Bonuses ───────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  if ev.get("event_type") == "skill_load":
293
+ components["skill_load"] = +0.5
294
+ score += 0.5
295
 
296
+ if ev.get("skill_candidate"):
297
+ components["skill_candidate"] = +1.0
298
+ score += 1.0
 
299
 
300
+ if ev.get("memory_stored"):
301
+ components["memory_stored"] = +0.3
302
+ score += 0.3
303
+
304
+ if lat is not None and lat < 1000 and ev.get("event_type") == "llm_call":
305
+ components["latency_under_1s"] = +0.5
306
+ score += 0.5
307
+
308
+ if ev.get("saga_clean"):
309
+ components["saga_clean"] = +0.5
310
+ score += 0.5
311
+
312
+ # Clamp 0–AUTO_CEILING (10 is human-only)
313
+ score = max(0.0, min(SCORE_AUTO_CEILING, score))
314
+ return round(score, 2), components
315
 
316
  # ---------------------------------------------------------------------------
317
  # Trace sync pipeline
 
409
  # Q-table update: map event → (state, action)
410
  _update_qtable_from_trace(ev, reward)
411
 
412
+ # RLHF auto-collection: preferred (>=8) and rejected (<=3)
413
+ if reward >= 8.0 or reward <= 3.0:
414
+ label = "approved" if reward >= 8.0 else "rejected"
415
+ prompt = (f"[{ev.get('agent','?')}] {ev.get('event_type','?')}: "
416
+ f"{ev.get('tool_name') or ev.get('model') or ev.get('task','')}")
417
+ completion = json.dumps({k: ev.get(k) for k in
418
+ ("status","latency_ms","tokens_out","saga_clean","skill_candidate","memory_stored")
419
+ if ev.get(k) is not None})
420
+ try:
421
+ rlhf_add(ev.get("agent","unknown"), prompt, completion,
422
+ label=label, reward=reward, source="auto",
423
+ meta={"trace_id": ev["id"], "components": components})
424
+ except Exception:
425
+ pass
426
+
427
  scored += 1
428
  reward_sum += reward
429
  new_cursors[agent] = max(new_cursors.get(agent, 0), ts)
 
700
  "properties":{"description":{"type":"string"},"agent":{"type":"string"}}}},
701
  {"name":"learn_sync","description":"Trigger immediate trace pull and reward scoring.",
702
  "inputSchema":{"type":"object","properties":{}}},
703
+ {"name":"learn_rate_trace","description":"Human rating override for a trace (0–10 float). Score 10 is human-only ceiling. Scores >=8 auto-labeled preferred, <=3 auto-labeled rejected in RLHF store.",
704
+ "inputSchema":{"type":"object","required":["trace_id","rating"],
705
+ "properties":{"trace_id":{"type":"string"},"rating":{"type":"number","minimum":0,"maximum":10},
706
+ "agent":{"type":"string"},"comment":{"type":"string"}}}},
707
  ]
708
 
709
  def handle_mcp(method, params, req_id):
 
732
  cid = candidate_add(a["description"],a["agent"])
733
  return txt({"ok":True,"id":cid})
734
  if n=="learn_sync": return txt(pull_and_score_traces())
735
+ if n=="learn_rate_trace":
736
+ rating = float(a["rating"])
737
+ if not (0.0 <= rating <= SCORE_HUMAN_MAX):
738
+ return txt({"ok":False,"error":f"rating must be 0–{SCORE_HUMAN_MAX}"})
739
+ agent = str(a.get("agent","unknown"))
740
+ comment = str(a.get("comment",""))
741
+ try: _http_patch(f"{TRACE_URL}/api/trace/{a['trace_id']}/reward",
742
+ {"reward":rating,"source":"human","comment":comment})
743
+ except Exception: pass
744
+ label = "approved" if rating>=8.0 else ("rejected" if rating<=3.0 else "unlabeled")
745
+ conn = get_db()
746
+ conn.execute("INSERT OR IGNORE INTO rewards (id,trace_id,agent,event_type,raw_score,components,ts) VALUES (?,?,?,?,?,?,?)",
747
+ (str(uuid.uuid4()),a["trace_id"],agent,"human_rating",rating,
748
+ json.dumps({"human_override":True,"comment":comment}),time.time()))
749
+ conn.commit(); conn.close()
750
+ rid = rlhf_add(agent,f"[human-rated] {a['trace_id']}",comment or "human override",
751
+ label=label,reward=rating,source="human",meta={"trace_id":a["trace_id"]})
752
+ return txt({"ok":True,"trace_id":a["trace_id"],"rating":rating,"label":label,"rlhf_id":rid})
753
  return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Unknown tool: {n}"}}
754
  if method in ("notifications/initialized","notifications/cancelled"): return None
755
  return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Method not found: {method}"}}
 
827
  ok = rlhf_label(entry_id, b.get("label","unlabeled"), b.get("reward"))
828
  return JSONResponse({"ok":ok})
829
 
830
+ @app.patch("/api/traces/{trace_id}/rate")
831
+ async def api_trace_rate(trace_id:str, request:Request):
832
+ """Human rating override — allows score of 10 (human-only ceiling).
833
+ Writes back to agent-trace and updates Q-table."""
834
+ if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key")
835
+ b = await request.json()
836
+ rating = float(b.get("rating", b.get("reward", 0.0)))
837
+ if not (0.0 <= rating <= SCORE_HUMAN_MAX):
838
+ raise HTTPException(400, f"rating must be 0–{SCORE_HUMAN_MAX}")
839
+ agent = str(b.get("agent","unknown"))
840
+ comment = str(b.get("comment",""))
841
+
842
+ # Write reward back to agent-trace (best-effort)
843
+ try:
844
+ _http_patch(f"{TRACE_URL}/api/trace/{trace_id}/reward",
845
+ {"reward": rating, "source": "human", "comment": comment})
846
+ except Exception:
847
+ pass
848
+
849
+ # Log in rewards table
850
+ conn = get_db()
851
+ conn.execute("""
852
+ INSERT OR IGNORE INTO rewards (id,trace_id,agent,event_type,raw_score,components,ts)
853
+ VALUES (?,?,?,?,?,?,?)
854
+ """, (str(uuid.uuid4()), trace_id, agent, "human_rating",
855
+ rating, json.dumps({"human_override": True, "comment": comment}), time.time()))
856
+ conn.commit(); conn.close()
857
+
858
+ # RLHF: store as approved/rejected based on rating
859
+ label = "approved" if rating >= 8.0 else ("rejected" if rating <= 3.0 else "unlabeled")
860
+ rlhf_add(agent, f"[human-rated trace] {trace_id}", comment or "human override",
861
+ label=label, reward=rating, source="human",
862
+ meta={"trace_id": trace_id, "comment": comment})
863
+
864
+ return JSONResponse({"ok": True, "trace_id": trace_id, "rating": rating, "label": label})
865
+
866
  # --- Skill candidates ---
867
  @app.get("/api/candidates")
868
  async def api_candidates(status:str=Query("pending")):
 
1119
  </div>
1120
  <div class="section">Scoring model</div>
1121
  <div style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden">
1122
+ ${[['baseline','+6.0','Every event starts here (acceptable)'],['error','-3.0','status=error'],['injection_detected','-4.0','Injection flag from agent-harness'],['pii_leaked','-4.0','PII exfiltration detected by compliance'],['hallucinated_tool','-3.0','Agent called non-existent tool'],['saga_compensated','-1.0','Saga pattern ran compensations'],['latency > 8s','-1.5','LLM call took > 8000ms'],['skill_load','+0.5','Reused skill from FORGE'],['skill_candidate','+1.0','Agent surfaced a new skill pattern'],['memory_stored','+0.3','Agent stored to agent-memory'],['latency < 1s (LLM)','+0.5','LLM call completed in < 1000ms'],['saga_clean','+0.5','Saga completed without compensation'],['AUTO CEILING','9.0','Max auto-score (10 = human-only via PATCH /api/traces/{id}/rate)']].map(([k,v,d])=>`<div class="config-row"><span class="config-key">${k}</span><span class="config-val" style="color:${v.startsWith('-')?'var(--rd)':v==='9.0'?'var(--ye)':'var(--gr)'}">${v}</span><span class="config-desc">${d}</span></div>`).join('')}
1123
  </div>`;
1124
  }
1125
 
 
1230
  async def root(): return HTMLResponse(content=SPA, media_type="text/html; charset=utf-8")
1231
 
1232
  if __name__ == "__main__":
1233
+ uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")