Spaces:

sh4shv4t
/

Parlay

Paused

App Files Files Community

sh4shv4t commited on 12 days ago

Commit

0faca0b

1 Parent(s): 70be177

feat: added reward audit program

Browse files

Files changed (1) hide show

scripts/debug_rewards_colabstyle.py +120 -0

scripts/debug_rewards_colabstyle.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+Colab-style reward diagnostic (path: set REPO or run from repo root).
+"""
+import inspect
+import json
+import os
+import re
+import sys
+REPO = os.environ.get("PARLAY_REPO", os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, REPO)
+# ── 1. Test the reward functions directly ─────────────────────
+from training.reward_fn import (  # noqa: E402
+    anti_capitulation_reward,
+    format_reward,
+    negotiation_efficiency_reward,
+    tom_accuracy_reward,
+)
+# Valid JSON, single line (user's Colab string had a broken newline inside the string)
+completions = [
+    (
+        '{"utterance": "I\'m willing to negotiate, but I need a significant raise.", '
+        '"offer_amount": 150000, "tactical_move": null}'
+    )
+]
+kwargs_hiring = {
+    "batna_seller": [195000.0],
+    "batna_buyer": [264500.0],
+    "zopa_width": [69500.0],
+    "scenario_id": ["hiring_package"],
+    "persona": ["shark"],
+}
+kwargs_saas = {
+    "batna_seller": [125000.0],
+    "batna_buyer": [165000.0],
+    "zopa_width": [40000.0],
+    "scenario_id": ["saas_enterprise"],
+    "persona": ["shark"],
+}
+print("=== REPO ===")
+print(f"  sys.path[0] = {sys.path[0]}")
+print("\n=== REWARD FUNCTION OUTPUTS ===")
+print(f"format_reward:         {format_reward(completions)}")
+print(f"anti_cap (hiring):     {anti_capitulation_reward(completions, **kwargs_hiring)}")
+print(f"tom_reward (hiring):   {tom_accuracy_reward(completions, **kwargs_hiring)}")
+print(f"efficiency (hiring):   {negotiation_efficiency_reward(completions, **kwargs_hiring)}")
+print(f"efficiency (saas):     {negotiation_efficiency_reward(completions, **kwargs_saas)}")
+# ── 2. Read reward_fn.py source and print the efficiency function ─
+print("\n=== negotiation_efficiency_reward SOURCE ===")
+src = inspect.getsource(negotiation_efficiency_reward)
+print(src)
+# ── 3. Step through the logic manually ───────────────────────
+print("\n=== MANUAL TRACE (hiring_package, offer=150000) ===")
+raw = completions[0]
+try:
+    parsed = json.loads(raw)
+    offer = parsed.get("offer_amount")
+    print(f"  parsed offer_amount: {offer!r}  (type: {type(offer).__name__})")
+except Exception as e:
+    print(f"  JSON parse failed: {e}")
+    offer = None
+batna_seller = 195000.0
+batna_buyer = 264500.0
+zopa_width = 69500.0
+scenario_id = "hiring_package"
+print(f"  scenario_id: {scenario_id}")
+print(f"  batna_seller: {batna_seller}  batna_buyer: {batna_buyer}")
+print(f"  zopa_width:   {zopa_width}")
+if offer is not None:
+    e_seller = (offer - batna_seller) / zopa_width
+    e_buyer = (batna_buyer - offer) / zopa_width
+    print(
+        f"  efficiency if treated as SELLER: {e_seller:.4f}  (offer - batna_seller) / width"
+    )
+    print(
+        f"  efficiency if treated as BUYER:  {e_buyer:.4f}  (batna_buyer - offer) / width"
+    )
+    print(
+        f"  offer ({offer}) vs batna_seller ({batna_seller}): "
+        f"{'ABOVE' if offer >= batna_seller else 'BELOW — anti-cap may fire'}"
+    )
+    print(
+        f"  offer ({offer}) vs batna_buyer  ({batna_buyer}):  "
+        f"{'AT OR BELOW' if offer <= batna_buyer else 'ABOVE batna_buyer'}"
+    )
+# ── 4. Check dataset paths (local) ─
+print("\n=== GRPO DATASET / DATA PATHS CHECK ===")
+for p in [
+    os.path.join(REPO, "data", "grpo_dataset"),
+    os.path.join(REPO, "data", "episodes.jsonl"),
+    os.path.join(REPO, "data", "episodes_v2.jsonl"),
+    REPO,
+]:
+    print(f"  exists={os.path.exists(p)!s:5}  {p}")
+# Grep-relevant lines from grpo_train
+print("\n=== grpo_train.py — lines mentioning build / batna / zopa / kwargs ===")
+gp = os.path.join(REPO, "training", "grpo_train.py")
+if os.path.isfile(gp):
+    with open(gp, encoding="utf-8") as f:
+        lines = f.readlines()
+    for i, line in enumerate(lines, start=1):
+        if re.search(
+            r"build_grpo|batna|zopa_width|def build|scenario_id|format_grpo",
+            line,
+        ):
+            print(f"  L{i}: {line.rstrip()}")
+print("\n=== DONE ===")