Spaces:
Sleeping
Sleeping
File size: 4,697 Bytes
025f14a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | import random
class Opponent:
def __init__(self, type_str, value, role):
self.type = type_str
self.opponent_value = value
self.opponent_role = role
if type_str == "greedy":
self.r, self.alpha, self.patience, self.epsilon = 0.05, 0.7, 10, 5
elif type_str == "fair":
self.r, self.alpha, self.patience, self.epsilon = 0.15, 0.4, 7, 10
elif type_str == "impatient":
self.r, self.alpha, self.patience, self.epsilon = 0.25, 0.2, 3, 15
self.concession_rate = self.r
def get_response(self, round_num, current_offer, agent_offer):
if self.opponent_role == "seller" and agent_offer >= self.opponent_value:
return "ACCEPT", agent_offer
if self.opponent_role == "buyer" and agent_offer <= self.opponent_value:
return "ACCEPT", agent_offer
if round_num > self.patience:
self.concession_rate = min(0.4, self.concession_rate + 0.05)
target = self.opponent_value
delta = target - current_offer
next_offer = current_offer + self.concession_rate * delta
next_offer = (1.0 - self.alpha) * next_offer + self.alpha * current_offer
next_offer += random.randint(-self.epsilon, self.epsilon)
next_offer = max(100, min(1000, int(next_offer)))
return "OFFER", next_offer
class Env:
def __init__(self, opp_type, a_val, o_val, role):
self.agent_value = a_val
self.opponent_value = o_val
self.role = role
self.opp_type = opp_type
self.opp_role = "seller" if role == "buyer" else "buyer"
self.opp = Opponent(opp_type, o_val, self.opp_role)
self.current_offer = (a_val + o_val) // 2
self.max_rounds = 20
self.round = 0
self.last_opp_action = "START"
self.last_opp_offer = 0
def step(self, action_price):
self.round += 1
aggressive = abs(action_price - self.opponent_value) > 150
opp_action, opp_price = self.opp.get_response(self.round, self.current_offer, action_price)
done = False
reward = 0
if opp_action == "ACCEPT":
deal_price = action_price
done = True
self.last_opp_action = "ACCEPT"
self.last_opp_offer = deal_price
profit = deal_price - self.agent_value if self.role == "seller" else self.agent_value - deal_price
t_factor = 1.0 - (self.round / self.max_rounds)
reward = profit * t_factor
if profit < 0: reward -= 20
if aggressive: reward -= 2
else:
self.current_offer = opp_price
self.last_opp_action = "OFFER"
self.last_opp_offer = opp_price
if self.round >= self.max_rounds:
reward = -50
done = True
return reward, done
def run_sim(name, opp_type, role, a_val, o_val, b_type):
print(f"\n=== {name} ===")
print(f"Opponent Type: {opp_type} | Agent Role: {role} | Agent Value: {a_val} | Opp Value: {o_val}")
env = Env(opp_type, a_val, o_val, role)
done = False
while not done and env.round <= 25:
act_price = 0
rnd = env.round + 1
if b_type == 1:
act_price = 100 if role == "buyer" else 900
elif b_type == 2:
act_price = 10 if role == "buyer" else 1500
elif b_type == 3:
if role == "buyer":
act_price = 100 if rnd == 1 else (o_val - 100 if rnd == 2 else o_val)
else:
act_price = 1000 if rnd == 1 else (o_val + 100 if rnd == 2 else o_val)
r, d = env.step(act_price)
done = d
opp_val_print = env.last_opp_offer if env.last_opp_action == "OFFER" else ""
print(f"[Round {env.round}] Agent OFFER {act_price} -> Opponent {env.last_opp_action} {opp_val_print} | Step Reward: {r:.2f} | Done: {done}")
if env.last_opp_action == "ACCEPT":
print(f"Final Deal Price: {env.last_opp_offer} | Final Reward: {r:.2f}")
else:
print(f"Final Deal Price: NONE | Final Reward: {r:.2f}")
random.seed(42)
print("--- TEST LOGS ---")
run_sim("Test 1A Baseline Greedy", "greedy", "buyer", 800, 500, 1)
run_sim("Test 1B Baseline Fair", "fair", "buyer", 800, 500, 1)
run_sim("Test 1C Baseline Impatient", "impatient", "buyer", 800, 500, 1)
run_sim("Test 2A Extreme vs Fair", "fair", "buyer", 800, 500, 2)
run_sim("Test 3A Gradual vs Fair", "fair", "buyer", 800, 500, 3)
run_sim("Test 4A Edge Approx Equal", "fair", "buyer", 510, 500, 3)
run_sim("Test 4B Edge Large Gap", "fair", "buyer", 900, 200, 3)
|