| """ |
| Colab-style reward diagnostic (path: set REPO or run from repo root). |
| """ |
| import inspect |
| import json |
| import os |
| import re |
| import sys |
|
|
| REPO = os.environ.get("PARLAY_REPO", os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| sys.path.insert(0, REPO) |
|
|
| |
| from training.reward_fn import ( |
| anti_capitulation_reward, |
| format_reward, |
| negotiation_efficiency_reward, |
| tom_accuracy_reward, |
| ) |
|
|
| |
| completions = [ |
| ( |
| '{"utterance": "I\'m willing to negotiate, but I need a significant raise.", ' |
| '"offer_amount": 150000, "tactical_move": null}' |
| ) |
| ] |
|
|
| kwargs_hiring = { |
| "batna_seller": [195000.0], |
| "batna_buyer": [264500.0], |
| "zopa_width": [69500.0], |
| "scenario_id": ["hiring_package"], |
| "persona": ["shark"], |
| } |
| kwargs_saas = { |
| "batna_seller": [125000.0], |
| "batna_buyer": [165000.0], |
| "zopa_width": [40000.0], |
| "scenario_id": ["saas_enterprise"], |
| "persona": ["shark"], |
| } |
|
|
| print("=== REPO ===") |
| print(f" sys.path[0] = {sys.path[0]}") |
|
|
| print("\n=== REWARD FUNCTION OUTPUTS ===") |
| print(f"format_reward: {format_reward(completions)}") |
| print(f"anti_cap (hiring): {anti_capitulation_reward(completions, **kwargs_hiring)}") |
| print(f"tom_reward (hiring): {tom_accuracy_reward(completions, **kwargs_hiring)}") |
| print(f"efficiency (hiring): {negotiation_efficiency_reward(completions, **kwargs_hiring)}") |
| print(f"efficiency (saas): {negotiation_efficiency_reward(completions, **kwargs_saas)}") |
|
|
| |
| print("\n=== negotiation_efficiency_reward SOURCE ===") |
| src = inspect.getsource(negotiation_efficiency_reward) |
| print(src) |
|
|
| |
| print("\n=== MANUAL TRACE (hiring_package, offer=150000) ===") |
| raw = completions[0] |
| try: |
| parsed = json.loads(raw) |
| offer = parsed.get("offer_amount") |
| print(f" parsed offer_amount: {offer!r} (type: {type(offer).__name__})") |
| except Exception as e: |
| print(f" JSON parse failed: {e}") |
| offer = None |
|
|
| batna_seller = 195000.0 |
| batna_buyer = 264500.0 |
| zopa_width = 69500.0 |
| scenario_id = "hiring_package" |
|
|
| print(f" scenario_id: {scenario_id}") |
| print(f" batna_seller: {batna_seller} batna_buyer: {batna_buyer}") |
| print(f" zopa_width: {zopa_width}") |
| if offer is not None: |
| e_seller = (offer - batna_seller) / zopa_width |
| e_buyer = (batna_buyer - offer) / zopa_width |
| print( |
| f" efficiency if treated as SELLER: {e_seller:.4f} (offer - batna_seller) / width" |
| ) |
| print( |
| f" efficiency if treated as BUYER: {e_buyer:.4f} (batna_buyer - offer) / width" |
| ) |
| print( |
| f" offer ({offer}) vs batna_seller ({batna_seller}): " |
| f"{'ABOVE' if offer >= batna_seller else 'BELOW β anti-cap may fire'}" |
| ) |
| print( |
| f" offer ({offer}) vs batna_buyer ({batna_buyer}): " |
| f"{'AT OR BELOW' if offer <= batna_buyer else 'ABOVE batna_buyer'}" |
| ) |
|
|
| |
| print("\n=== GRPO DATASET / DATA PATHS CHECK ===") |
| for p in [ |
| os.path.join(REPO, "data", "grpo_dataset"), |
| os.path.join(REPO, "data", "episodes.jsonl"), |
| os.path.join(REPO, "data", "episodes_v2.jsonl"), |
| REPO, |
| ]: |
| print(f" exists={os.path.exists(p)!s:5} {p}") |
|
|
| |
| print("\n=== grpo_train.py β lines mentioning build / batna / zopa / kwargs ===") |
| gp = os.path.join(REPO, "training", "grpo_train.py") |
| if os.path.isfile(gp): |
| with open(gp, encoding="utf-8") as f: |
| lines = f.readlines() |
| for i, line in enumerate(lines, start=1): |
| if re.search( |
| r"build_grpo|batna|zopa_width|def build|scenario_id|format_grpo", |
| line, |
| ): |
| print(f" L{i}: {line.rstrip()}") |
|
|
|
|
| print("\n=== DONE ===") |
|
|