Spaces:

MridulNegi2005
/

negotiation-openenv

Sleeping

App Files Files Community

MridulNegi2005 commited on Apr 7

Commit

3f2a3ab

1 Parent(s): a5c1817

Major env overhaul: opponent negotiates naturally, gentler time penalty, relative aggression, simplified agent

Browse files

Files changed (3) hide show

__pycache__/env_wrapper.cpython-312.pyc +0 -0
env_wrapper.py +16 -7
inference.py +21 -45

__pycache__/env_wrapper.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/env_wrapper.cpython-312.pyc and b/__pycache__/env_wrapper.cpython-312.pyc differ

env_wrapper.py CHANGED Viewed

@@ -81,10 +81,15 @@ class Opponent:
             return "REJECT", 0
         # ── Acceptance Check ──
-        if self.opponent_role == "seller" and agent_offer >= self.opponent_value:
-            self.history.append({"round": round_num, "action": "ACCEPT", "price": agent_offer})
-            return "ACCEPT", agent_offer
-        if self.opponent_role == "buyer" and agent_offer <= self.opponent_value:
             self.history.append({"round": round_num, "action": "ACCEPT", "price": agent_offer})
             return "ACCEPT", agent_offer
@@ -192,8 +197,9 @@ class EnvWrapper:
         else:
             profit = self.agent_value - deal_price
-        # Softer time decay: sqrt penalizes less harshly in early rounds
-        time_factor = 1.0 - (self.round / self.max_rounds) ** 0.5
         base_reward = profit * time_factor
         # Penalty for bad deals (agent accepts a losing deal)
@@ -268,7 +274,10 @@ class EnvWrapper:
             action_str = f"OFFER {action_price}"
             # ── CUMULATIVE AGGRESSION PENALTY ──
-            if abs(action_price - self.opponent_value) > 150:
                 self.cumulative_aggression_penalty += 2.0
         # Record this step in history

             return "REJECT", 0
         # ── Acceptance Check ──
+        # Opponent negotiates for a minimum number of rounds before accepting.
+        # Greedy opponents hold out longer; impatient ones settle sooner.
+        min_round_to_accept = max(2, self.patience // 3)
+        offer_acceptable = (
+            (self.opponent_role == "seller" and agent_offer >= self.opponent_value) or
+            (self.opponent_role == "buyer" and agent_offer <= self.opponent_value)
+        )
+        if offer_acceptable and round_num >= min_round_to_accept:
             self.history.append({"round": round_num, "action": "ACCEPT", "price": agent_offer})
             return "ACCEPT", agent_offer
         else:
             profit = self.agent_value - deal_price
+        # Gentle time decay: linear, max 50% loss even if all rounds used.
+        # This rewards fast deals but doesn't destroy multi-round negotiation.
+        time_factor = 1.0 - 0.5 * (self.round / self.max_rounds)
         base_reward = profit * time_factor
         # Penalty for bad deals (agent accepts a losing deal)
             action_str = f"OFFER {action_price}"
             # ── CUMULATIVE AGGRESSION PENALTY ──
+            # Scale threshold to ZOPA width so narrow-ZOPA tasks aren't unfairly punished
+            zopa = abs(self.agent_value - self.opponent_value)
+            aggression_threshold = max(100, int(zopa * 1.25))
+            if abs(action_price - self.opponent_value) > aggression_threshold:
                 self.cumulative_aggression_penalty += 2.0
         # Record this step in history

inference.py CHANGED Viewed

@@ -72,7 +72,7 @@ def run_task(client, model_name: str, task_config):
             target_goal = "buy for as low as possible (below your maximum value)" if obs.role == "buyer" else "sell for as high as possible (above your minimum value)"
-            prompt = f"""You are an expert negotiator acting as a {obs.role}. Your objective is to {target_goal} and maximize your profit through strategic multi-round bargaining.
 CURRENT STATE:
 * Your PRIVATE Valuation: {obs.agent_value} (your absolute limit — NEVER go past this)
@@ -81,28 +81,21 @@ CURRENT STATE:
 * Opponent's last action: {obs.last_opponent_action}
 * Opponent's last offer: {obs.last_opponent_offer}
-{history_text}YOUR NEGOTIATION PLAYBOOK:
-ROUND-BY-ROUND STRATEGY (you are a {obs.role}):
-{("- Round 1: You only have " + str(obs.max_rounds) + " rounds! Start at about 60-65% of your own valuation (" + str(obs.agent_value) + ") as your first offer. Then increase quickly by 100+ each round.") if obs.max_rounds <= 8 else ("- Round 1: Start AGGRESSIVE. Offer around 30-35% of the opponent's opening price. This anchors the negotiation in your favor." if obs.role == "buyer" else "- Round 1: Start AGGRESSIVE. Offer around 2-3x your minimum value. This anchors the negotiation in your favor.")}
-- Round 2-3: Concede moderately. {"Increase" if obs.role == "buyer" else "Decrease"} your offer to find their breaking point.
-- Round 3-4: If the opponent's counter-offer is profitable for you ({"below" if obs.role == "buyer" else "above"} your valuation), ACCEPT it. Otherwise make one final offer near the midpoint.
-- Round 5+: You are running out of time. ACCEPT any profitable deal immediately.
-ABSOLUTE LIMIT: {"Your offer must NEVER exceed " + str(obs.agent_value) + ". Any offer above " + str(obs.agent_value) + " loses you money!" if obs.role == "buyer" else "Your offer must NEVER go below " + str(obs.agent_value) + ". Any offer below " + str(obs.agent_value) + " loses you money!"}
-SCORING RULES:
-1. PROFIT MATTERS MOST: Your score = (your profit) × (time bonus). A great deal on round 3 beats a mediocre deal on round 1.
-2. TIME BONUS: Decreases each round. Don't drag past round 5.
-3. AGGRESSION PENALTY: Offers extremely far from reasonable (e.g., offering 100 when market is 500+) are penalized. Stay within a plausible range.
-4. NEVER REJECT — a bad deal is almost always better than no deal (rejection = -50 penalty).
-Choose exactly ONE action:
-* OFFER <price> — counter-offer ({"must be below " + str(obs.agent_value) if obs.role == "buyer" else "must be above " + str(obs.agent_value)})
-* ACCEPT — accept if the opponent's offer gives you good profit
-* REJECT — walk away (almost never do this)
-Respond with ONLY your action. Example: OFFER 350"""
             action_str = "REJECT"
             action_price = 0
@@ -151,50 +144,33 @@ Respond with ONLY your action. Example: OFFER 350"""
                 action_str = "REJECT"
                 action_price = 0
-            # ── ACCEPT guard: never accept a losing deal ──
             if action_str == "ACCEPT":
                 opp_offer = obs.last_opponent_offer
                 if obs.role == "buyer" and opp_offer > obs.agent_value:
-                    # Opponent wants more than our max — counter instead
                     action_str = "OFFER"
-                    action_price = last_agent_offer + 80 if last_agent_offer else int(obs.agent_value * 0.6)
                 elif obs.role == "seller" and opp_offer < obs.agent_value:
                     action_str = "OFFER"
-                    action_price = last_agent_offer - 80 if last_agent_offer else int(obs.agent_value * 1.4)
-            # ── Smart offer clamping ──
             if action_str.startswith("OFFER") and action_price > 0:
-                # Hard limit: never cross own valuation
                 if obs.role == "buyer":
                     action_price = min(action_price, obs.agent_value - 10)
                 else:
                     action_price = max(action_price, obs.agent_value + 10)
-                # Adaptive concession cap: short games need bigger steps
-                max_step = max(80, 1200 // env.max_rounds)
                 if last_agent_offer is not None:
                     if obs.role == "buyer":
-                        action_price = min(action_price, last_agent_offer + max_step)
                     else:
-                        action_price = max(action_price, last_agent_offer - max_step)
-                # Target price: aim for ~40% of the gap from agent's value
-                # This is where the best profit-vs-time tradeoff lives
-                gap = abs(obs.agent_value - obs.current_offer)
-                if obs.role == "buyer":
-                    target = obs.agent_value - int(gap * 0.4)  # aim to buy well below value
-                    # Don't let round 2+ offers go above target unless desperate (round 4+)
-                    if step_n <= 3 and last_agent_offer is not None:
-                        action_price = min(action_price, max(target, last_agent_offer + 50))
-                else:
-                    target = obs.agent_value + int(gap * 0.4)
-                    if step_n <= 3 and last_agent_offer is not None:
-                        action_price = max(action_price, min(target, last_agent_offer - 50))
                 action_str = f"OFFER {action_price}"
                 last_agent_offer = action_price
-            elif action_str.startswith("OFFER"):
-                last_agent_offer = action_price
             # ── Step the environment ──
             obs, reward, done, info = env.step(action_str, action_price)

             target_goal = "buy for as low as possible (below your maximum value)" if obs.role == "buyer" else "sell for as high as possible (above your minimum value)"
+            prompt = f"""You are an expert negotiator acting as a {obs.role}. Your objective is to {target_goal} and maximize your profit.
 CURRENT STATE:
 * Your PRIVATE Valuation: {obs.agent_value} (your absolute limit — NEVER go past this)
 * Opponent's last action: {obs.last_opponent_action}
 * Opponent's last offer: {obs.last_opponent_offer}
+{history_text}STRATEGY:
+- Start your first offer at about 40-50% of the opening price. {"As a buyer with valuation " + str(obs.agent_value) + ", aim to pay as LITTLE as possible — profit = valuation minus price." if obs.role == "buyer" else "As a seller with valuation " + str(obs.agent_value) + ", aim to sell as HIGH as possible — profit = price minus valuation."}
+- Concede slowly each round (50-80 per round), watching the opponent move toward you.
+- If the opponent's counter is {"below" if obs.role == "buyer" else "above"} {obs.agent_value}, ACCEPT it — that's guaranteed profit!
+- Close within 3-5 rounds for best time bonus.
+- NEVER REJECT — rejection = -50 penalty.
+HARD RULE: {"Your offer must be BELOW " + str(obs.agent_value) + ". Offering above it loses you money." if obs.role == "buyer" else "Your offer must be ABOVE " + str(obs.agent_value) + ". Offering below it loses you money."}
+Choose ONE action:
+* OFFER <price>
+* ACCEPT
+* REJECT
+Respond with ONLY your action. Example: OFFER 450"""
             action_str = "REJECT"
             action_price = 0
                 action_str = "REJECT"
                 action_price = 0
+            # ── Safety guardrails ──
+            # ACCEPT guard: never accept a deal worse than our valuation
             if action_str == "ACCEPT":
                 opp_offer = obs.last_opponent_offer
                 if obs.role == "buyer" and opp_offer > obs.agent_value:
                     action_str = "OFFER"
+                    action_price = last_agent_offer + 50 if last_agent_offer else int(obs.agent_value * 0.6)
                 elif obs.role == "seller" and opp_offer < obs.agent_value:
                     action_str = "OFFER"
+                    action_price = last_agent_offer - 50 if last_agent_offer else int(obs.agent_value * 1.4)
+            # Valuation clamp: never offer past our own limit
             if action_str.startswith("OFFER") and action_price > 0:
                 if obs.role == "buyer":
                     action_price = min(action_price, obs.agent_value - 10)
                 else:
                     action_price = max(action_price, obs.agent_value + 10)
+                # Concession cap: max 120 per round to prevent panic jumps
                 if last_agent_offer is not None:
                     if obs.role == "buyer":
+                        action_price = min(action_price, last_agent_offer + 120)
                     else:
+                        action_price = max(action_price, last_agent_offer - 120)
                 action_str = f"OFFER {action_price}"
                 last_agent_offer = action_price
             # ── Step the environment ──
             obs, reward, done, info = env.step(action_str, action_price)