Atishay Jain commited on
Commit
025f14a
·
1 Parent(s): 34dfc61

feat: implement Opponent class and add simulation testing framework for negotiation environment

Browse files
build.bat ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ echo Building Meta OpenEnv C++ Project...
3
+ g++ tests/simulation.cpp env/NegotiationEnv.cpp opponent/Opponent.cpp -I. -std=c++17 -o test_sim.exe
4
+
5
+ if %ERRORLEVEL% EQU 0 (
6
+ echo Build Successful! Running simulation...
7
+ echo =======================================
8
+ .\test_sim.exe
9
+ echo =======================================
10
+ ) else (
11
+ echo Build Failed! Make sure g++ is installed and in your PATH.
12
+ )
13
+ pause
env/NegotiationEnv.cpp CHANGED
@@ -1,22 +1,111 @@
1
  #include "NegotiationEnv.h"
 
 
 
 
2
 
3
  NegotiationEnv::NegotiationEnv() {
4
- // Initialization logic placeholder
5
- agent_value = 0;
6
- opponent_value = 0;
7
- opponent_type = "fair";
8
  }
9
 
10
  void NegotiationEnv::reset() {
11
- // Initialize episode placeholder
12
- State new_state;
13
- state = new_state;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  }
15
 
16
  std::tuple<State, double, bool> NegotiationEnv::step(Action action) {
17
- // Process action placeholder
 
 
 
18
  double reward = 0.0;
19
  bool done = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  return std::make_tuple(state, reward, done);
22
  }
 
1
  #include "NegotiationEnv.h"
2
+ #include "../opponent/Opponent.h"
3
+ #include <random>
4
+ #include <ctime>
5
+ #include <cmath>
6
 
7
  NegotiationEnv::NegotiationEnv() {
8
+ std::srand(static_cast<unsigned int>(std::time(nullptr)));
 
 
 
9
  }
10
 
11
  void NegotiationEnv::reset() {
12
+ state.setRound(0);
13
+ state.setMaxRounds(20);
14
+
15
+ agent_value = 100 + (std::rand() % 901);
16
+ opponent_value = 100 + (std::rand() % 901);
17
+
18
+ if (std::rand() % 2 == 0) {
19
+ state.setRole("buyer");
20
+ } else {
21
+ state.setRole("seller");
22
+ }
23
+
24
+ int t = std::rand() % 3;
25
+ if (t == 0) opponent_type = "greedy";
26
+ else if (t == 1) opponent_type = "fair";
27
+ else opponent_type = "impatient";
28
+
29
+ int initial_offer = (agent_value + opponent_value) / 2;
30
+ state.setCurrentOffer(initial_offer);
31
+ state.setLastOpponentAction("START");
32
+ state.setLastOpponentOffer(0);
33
+
34
+ std::string opp_role = (state.getRole() == "buyer") ? "seller" : "buyer";
35
+ opponent_strategy = std::make_unique<Opponent>(opponent_type, opponent_value, opp_role);
36
+ }
37
+
38
+ double NegotiationEnv::compute_reward(int deal_price) {
39
+ double profit = 0;
40
+ if (state.getRole() == "seller") {
41
+ profit = deal_price - agent_value;
42
+ } else {
43
+ profit = agent_value - deal_price;
44
+ }
45
+
46
+ double time_factor = 1.0 - (static_cast<double>(state.getRound()) / state.getMaxRounds());
47
+ double reward = profit * time_factor;
48
+
49
+ if (profit < 0) {
50
+ reward -= 20;
51
+ }
52
+ return reward;
53
  }
54
 
55
  std::tuple<State, double, bool> NegotiationEnv::step(Action action) {
56
+ // 1. Increment round
57
+ int current_round = state.getRound() + 1;
58
+ state.setRound(current_round);
59
+
60
  double reward = 0.0;
61
  bool done = false;
62
+
63
+ if (action.getType() == ActionType::ACCEPT) {
64
+ // 2. If ACCEPT
65
+ int deal_price = state.getLastOpponentOffer();
66
+ reward = compute_reward(deal_price);
67
+ done = true;
68
+ } else if (action.getType() == ActionType::REJECT) {
69
+ // 3. If REJECT
70
+ reward = -50;
71
+ done = true;
72
+ } else if (action.getType() == ActionType::OFFER) {
73
+ // 4. If OFFER
74
+ Action opp_response = opponent_strategy->getResponse(state, action);
75
+
76
+ if (opp_response.getType() == ActionType::ACCEPT) {
77
+ int deal_price = action.getPrice();
78
+ reward = compute_reward(deal_price);
79
+ done = true;
80
+
81
+ // 5. Update state
82
+ state.setLastOpponentAction("ACCEPT");
83
+ } else {
84
+ // Opponent generates counter-offer
85
+ state.setCurrentOffer(opp_response.getPrice());
86
+ // 5. Update state
87
+ state.setLastOpponentAction("OFFER");
88
+ state.setLastOpponentOffer(opp_response.getPrice());
89
+ }
90
+ }
91
+
92
+ // 6. If round == max_rounds
93
+ if (!done && current_round >= state.getMaxRounds()) {
94
+ reward = -50;
95
+ done = true;
96
+ }
97
+
98
+ // Apply Aggression Penalty if deal resolves this step
99
+ if (done && action.getType() == ActionType::OFFER) {
100
+ if (std::abs(action.getPrice() - opponent_value) > 150) {
101
+ reward -= 2;
102
+ }
103
+ }
104
+
105
+ // 7. If NOT done (IMPORTANT strict rule)
106
+ if (!done) {
107
+ reward = 0;
108
+ }
109
 
110
  return std::make_tuple(state, reward, done);
111
  }
env/NegotiationEnv.h CHANGED
@@ -1,17 +1,21 @@
1
  #pragma once
2
  #include "State.h"
3
  #include "Action.h"
 
4
  #include <tuple>
5
  #include <string>
 
6
 
7
  class NegotiationEnv {
8
  private:
9
  State state;
10
-
11
- // Hidden (internal)
12
  int agent_value;
13
  int opponent_value;
14
- std::string opponent_type; // "greedy", "fair", "impatient"
 
 
 
 
15
 
16
  public:
17
  NegotiationEnv();
 
1
  #pragma once
2
  #include "State.h"
3
  #include "Action.h"
4
+ #include "../opponent/OpponentStrategy.h"
5
  #include <tuple>
6
  #include <string>
7
+ #include <memory>
8
 
9
  class NegotiationEnv {
10
  private:
11
  State state;
 
 
12
  int agent_value;
13
  int opponent_value;
14
+ std::string opponent_type;
15
+
16
+ std::unique_ptr<OpponentStrategy> opponent_strategy;
17
+
18
+ double compute_reward(int deal_price);
19
 
20
  public:
21
  NegotiationEnv();
opponent/Opponent.cpp ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "Opponent.h"
2
+ #include <stdexcept>
3
+ #include <ctime>
4
+
5
+ Opponent::Opponent(const std::string& type, int value, const std::string& role)
6
+ : type(type), opponent_value(value), opponent_role(role) {
7
+ if (type == "greedy") {
8
+ r = 0.05; alpha = 0.7; patience = 10; epsilon = 5;
9
+ } else if (type == "fair") {
10
+ r = 0.15; alpha = 0.4; patience = 7; epsilon = 10;
11
+ } else if (type == "impatient") {
12
+ r = 0.25; alpha = 0.2; patience = 3; epsilon = 15;
13
+ } else {
14
+ throw std::invalid_argument("Unknown opponent type");
15
+ }
16
+ concession_rate = r;
17
+ rng.seed(static_cast<unsigned int>(std::time(nullptr)));
18
+ }
19
+
20
+ Action Opponent::getResponse(const State& state, const Action& agent_action) {
21
+ if (agent_action.getType() != ActionType::OFFER) {
22
+ return Action(ActionType::REJECT);
23
+ }
24
+
25
+ int agent_offer = agent_action.getPrice();
26
+
27
+ // 1. Acceptance
28
+ bool accept = false;
29
+ if (opponent_role == "seller") {
30
+ if (agent_offer >= opponent_value) accept = true;
31
+ } else { // "buyer"
32
+ if (agent_offer <= opponent_value) accept = true;
33
+ }
34
+
35
+ if (accept) {
36
+ return Action(ActionType::ACCEPT);
37
+ }
38
+
39
+ // Patience Behavior (STRICT RULE)
40
+ int current_round = state.getRound();
41
+ if (current_round > patience) {
42
+ concession_rate += 0.05;
43
+ if (concession_rate > 0.4) concession_rate = 0.4;
44
+ }
45
+
46
+ // 3. Counter Offer
47
+ double target = opponent_value;
48
+ double current_offer = state.getCurrentOffer();
49
+
50
+ double delta = target - current_offer;
51
+ double next_offer = current_offer + concession_rate * delta;
52
+
53
+ // Anchor Effect
54
+ next_offer = (1.0 - alpha) * next_offer + alpha * current_offer;
55
+
56
+ // Noise
57
+ std::uniform_int_distribution<int> noise_dist(-epsilon, epsilon);
58
+ next_offer += noise_dist(rng);
59
+
60
+ // Clamp between [100, 1000]
61
+ int final_offer = static_cast<int>(next_offer);
62
+ if (final_offer < 100) final_offer = 100;
63
+ if (final_offer > 1000) final_offer = 1000;
64
+
65
+ return Action(ActionType::OFFER, final_offer);
66
+ }
opponent/Opponent.h ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pragma once
2
+ #include <string>
3
+ #include <random>
4
+ #include "../env/State.h"
5
+ #include "../env/Action.h"
6
+ #include "OpponentStrategy.h"
7
+
8
+ class Opponent : public OpponentStrategy {
9
+ private:
10
+ std::string type;
11
+ int opponent_value;
12
+ std::string opponent_role;
13
+ double r;
14
+ double alpha;
15
+ int patience;
16
+ int epsilon;
17
+
18
+ double concession_rate;
19
+
20
+ std::mt19937 rng;
21
+
22
+ public:
23
+ Opponent(const std::string& type, int value, const std::string& role);
24
+ Action getResponse(const State& state, const Action& agent_action) override;
25
+ };
tests/experiment.cpp ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #define private public
2
+ #include "../env/NegotiationEnv.h"
3
+ #undef private
4
+ #include "../opponent/Opponent.h"
5
+
6
+ #include <iostream>
7
+ #include <iomanip>
8
+ #include <string>
9
+ #include <memory>
10
+
11
+ void run_simulation(std::string test_name, std::string opp_type, std::string role, int a_val, int o_val, int behavior_type) {
12
+ NegotiationEnv env;
13
+ env.reset();
14
+
15
+ // Force specific scenario
16
+ env.opponent_type = opp_type;
17
+ env.agent_value = a_val;
18
+ env.opponent_value = o_val;
19
+ env.state.setRole(role);
20
+ env.state.setCurrentOffer((a_val + o_val) / 2);
21
+ env.state.setLastOpponentOffer(0);
22
+
23
+ std::string opp_role = (role == "buyer") ? "seller" : "buyer";
24
+ env.opponent_strategy = std::make_unique<Opponent>(opp_type, o_val, opp_role);
25
+
26
+ std::cout << "\n=== " << test_name << " ===" << std::endl;
27
+ std::cout << "Opp Type: " << opp_type << " | Role: " << role << " | Ag_Val: " << a_val << " | Opp_Val: " << o_val << std::endl;
28
+
29
+ bool done = false;
30
+ double final_reward = 0;
31
+ int round = 0;
32
+
33
+ int action_price = 0;
34
+ while (!done) {
35
+ round = env.state.getRound() + 1;
36
+
37
+ if (behavior_type == 1) { // Baseline: fixed offer
38
+
39
+ action_price = (role == "buyer") ? 100 : 900;
40
+ } else if (behavior_type == 2) { // Extreme: super bad
41
+ action_price = (role == "buyer") ? 10 : 1500;
42
+ } else if (behavior_type == 3) { // Gradual improvement
43
+ if (role == "buyer") {
44
+ if (round == 1) action_price = 100;
45
+ else if (round == 2) action_price = a_val - 200; // e.g. 600
46
+ else action_price = a_val - 50; // e.g. 750 (near acceptable for opp who wants 500)
47
+ } else { // seller
48
+ if (round == 1) action_price = 1000;
49
+ else if (round == 2) action_price = a_val + 200;
50
+ else action_price = a_val + 50;
51
+ }
52
+ }
53
+
54
+ Action agent_action(ActionType::OFFER, action_price);
55
+ std::cout << "[Round " << round << "] Agent OFFER " << action_price << " -> ";
56
+
57
+ auto [next_state, reward, is_done] = env.step(agent_action);
58
+ done = is_done;
59
+
60
+ std::cout << "Opponent " << env.state.getLastOpponentAction();
61
+ if (env.state.getLastOpponentAction() == "OFFER") {
62
+ std::cout << " " << env.state.getLastOpponentOffer();
63
+ }
64
+ std::cout << " | Reward: " << reward << " | Done: " << (done ? "True" : "False") << std::endl;
65
+
66
+ if (done) final_reward = reward;
67
+ if (round >= env.state.getMaxRounds()) break;
68
+ }
69
+
70
+ if (env.state.getLastOpponentAction() == "ACCEPT") {
71
+ // Last op action is accept, means opponent accepted the agent's OFFER.
72
+ // Wait, deal price is agent's offer.
73
+ std::cout << "Final Deal Price: " << action_price << " | Final Reward: " << final_reward << std::endl;
74
+ } else {
75
+ std::cout << "Final Deal Price: NONE (" << env.state.getLastOpponentAction() << ") | Final Reward: " << final_reward << std::endl;
76
+ }
77
+ }
78
+
79
+ int main() {
80
+ std::cout << "--- EXPERIMENT LOGS ---" << std::endl;
81
+ // Buyer wants to buy for as low as possible (Profit = target - price). So target = 800.
82
+ // Opponent is Seller, wants price >= 500.
83
+
84
+ // TEST SET 1: Baseline
85
+ run_simulation("TEST SET 1A (Baseline vs Greedy)", "greedy", "buyer", 800, 500, 1);
86
+ run_simulation("TEST SET 1B (Baseline vs Fair)", "fair", "buyer", 800, 500, 1);
87
+ run_simulation("TEST SET 1C (Baseline vs Impatient)", "impatient", "buyer", 800, 500, 1);
88
+
89
+ // TEST SET 2: Extreme Strategy
90
+ run_simulation("TEST SET 2A (Extreme vs Fair)", "fair", "buyer", 800, 500, 2);
91
+ run_simulation("TEST SET 2B (Extreme vs Impatient)", "impatient", "buyer", 800, 500, 2);
92
+
93
+ // TEST SET 3: Gradual Improvement
94
+ run_simulation("TEST SET 3A (Gradual vs Fair)", "fair", "buyer", 800, 500, 3);
95
+ run_simulation("TEST SET 3B (Gradual vs Impatient)", "impatient", "buyer", 800, 500, 3);
96
+
97
+ // TEST SET 4: Edge Cases
98
+ // Approx Equal (very tight margins)
99
+ run_simulation("TEST SET 4A (Edge - Approx Equal)", "fair", "buyer", 510, 500, 3);
100
+ // Large gap (very wide margins, easy deal)
101
+ run_simulation("TEST SET 4B (Edge - Large Gap)", "fair", "buyer", 900, 200, 3);
102
+
103
+ return 0;
104
+ }
tests/run_experiments.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ class Opponent:
4
+ def __init__(self, type_str, value, role):
5
+ self.type = type_str
6
+ self.opponent_value = value
7
+ self.opponent_role = role
8
+ if type_str == "greedy":
9
+ self.r, self.alpha, self.patience, self.epsilon = 0.05, 0.7, 10, 5
10
+ elif type_str == "fair":
11
+ self.r, self.alpha, self.patience, self.epsilon = 0.15, 0.4, 7, 10
12
+ elif type_str == "impatient":
13
+ self.r, self.alpha, self.patience, self.epsilon = 0.25, 0.2, 3, 15
14
+ self.concession_rate = self.r
15
+
16
+ def get_response(self, round_num, current_offer, agent_offer):
17
+ if self.opponent_role == "seller" and agent_offer >= self.opponent_value:
18
+ return "ACCEPT", agent_offer
19
+ if self.opponent_role == "buyer" and agent_offer <= self.opponent_value:
20
+ return "ACCEPT", agent_offer
21
+
22
+ if round_num > self.patience:
23
+ self.concession_rate = min(0.4, self.concession_rate + 0.05)
24
+
25
+ target = self.opponent_value
26
+ delta = target - current_offer
27
+ next_offer = current_offer + self.concession_rate * delta
28
+ next_offer = (1.0 - self.alpha) * next_offer + self.alpha * current_offer
29
+ next_offer += random.randint(-self.epsilon, self.epsilon)
30
+ next_offer = max(100, min(1000, int(next_offer)))
31
+ return "OFFER", next_offer
32
+
33
+ class Env:
34
+ def __init__(self, opp_type, a_val, o_val, role):
35
+ self.agent_value = a_val
36
+ self.opponent_value = o_val
37
+ self.role = role
38
+ self.opp_type = opp_type
39
+ self.opp_role = "seller" if role == "buyer" else "buyer"
40
+ self.opp = Opponent(opp_type, o_val, self.opp_role)
41
+ self.current_offer = (a_val + o_val) // 2
42
+ self.max_rounds = 20
43
+ self.round = 0
44
+ self.last_opp_action = "START"
45
+ self.last_opp_offer = 0
46
+
47
+ def step(self, action_price):
48
+ self.round += 1
49
+ aggressive = abs(action_price - self.opponent_value) > 150
50
+
51
+ opp_action, opp_price = self.opp.get_response(self.round, self.current_offer, action_price)
52
+ done = False
53
+ reward = 0
54
+
55
+ if opp_action == "ACCEPT":
56
+ deal_price = action_price
57
+ done = True
58
+ self.last_opp_action = "ACCEPT"
59
+ self.last_opp_offer = deal_price
60
+
61
+ profit = deal_price - self.agent_value if self.role == "seller" else self.agent_value - deal_price
62
+ t_factor = 1.0 - (self.round / self.max_rounds)
63
+ reward = profit * t_factor
64
+ if profit < 0: reward -= 20
65
+ if aggressive: reward -= 2
66
+
67
+ else:
68
+ self.current_offer = opp_price
69
+ self.last_opp_action = "OFFER"
70
+ self.last_opp_offer = opp_price
71
+ if self.round >= self.max_rounds:
72
+ reward = -50
73
+ done = True
74
+
75
+ return reward, done
76
+
77
+ def run_sim(name, opp_type, role, a_val, o_val, b_type):
78
+ print(f"\n=== {name} ===")
79
+ print(f"Opponent Type: {opp_type} | Agent Role: {role} | Agent Value: {a_val} | Opp Value: {o_val}")
80
+ env = Env(opp_type, a_val, o_val, role)
81
+ done = False
82
+
83
+ while not done and env.round <= 25:
84
+ act_price = 0
85
+ rnd = env.round + 1
86
+ if b_type == 1:
87
+ act_price = 100 if role == "buyer" else 900
88
+ elif b_type == 2:
89
+ act_price = 10 if role == "buyer" else 1500
90
+ elif b_type == 3:
91
+ if role == "buyer":
92
+ act_price = 100 if rnd == 1 else (o_val - 100 if rnd == 2 else o_val)
93
+ else:
94
+ act_price = 1000 if rnd == 1 else (o_val + 100 if rnd == 2 else o_val)
95
+
96
+ r, d = env.step(act_price)
97
+ done = d
98
+ opp_val_print = env.last_opp_offer if env.last_opp_action == "OFFER" else ""
99
+ print(f"[Round {env.round}] Agent OFFER {act_price} -> Opponent {env.last_opp_action} {opp_val_print} | Step Reward: {r:.2f} | Done: {done}")
100
+
101
+ if env.last_opp_action == "ACCEPT":
102
+ print(f"Final Deal Price: {env.last_opp_offer} | Final Reward: {r:.2f}")
103
+ else:
104
+ print(f"Final Deal Price: NONE | Final Reward: {r:.2f}")
105
+
106
+ random.seed(42)
107
+ print("--- TEST LOGS ---")
108
+ run_sim("Test 1A Baseline Greedy", "greedy", "buyer", 800, 500, 1)
109
+ run_sim("Test 1B Baseline Fair", "fair", "buyer", 800, 500, 1)
110
+ run_sim("Test 1C Baseline Impatient", "impatient", "buyer", 800, 500, 1)
111
+ run_sim("Test 2A Extreme vs Fair", "fair", "buyer", 800, 500, 2)
112
+ run_sim("Test 3A Gradual vs Fair", "fair", "buyer", 800, 500, 3)
113
+ run_sim("Test 4A Edge Approx Equal", "fair", "buyer", 510, 500, 3)
114
+ run_sim("Test 4B Edge Large Gap", "fair", "buyer", 900, 200, 3)
tests/simulation.cpp CHANGED
@@ -1,15 +1,38 @@
1
  #include <iostream>
2
  #include "../env/NegotiationEnv.h"
 
3
 
4
- void run_simulation() {
 
5
  NegotiationEnv env;
 
 
6
  env.reset();
 
 
 
7
 
8
- // Placeholder for simulation loop
9
- std::cout << "Simulation initialized." << std::endl;
10
- }
11
-
12
- int main() {
13
- run_simulation();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  return 0;
15
  }
 
1
  #include <iostream>
2
  #include "../env/NegotiationEnv.h"
3
+ #include "../agents/DummyAgent.cpp"
4
 
5
+ int main() {
6
+ std::cout << "Starting Phase 2 Simulation Trace..." << std::endl;
7
  NegotiationEnv env;
8
+ DummyAgent agent;
9
+
10
  env.reset();
11
+ State state = env.getState();
12
+ std::cout << "Agent Role: " << state.getRole() << std::endl;
13
+ std::cout << "Initial Offer state: " << state.getCurrentOffer() << std::endl;
14
 
15
+ bool done = false;
16
+ double total_reward = 0;
17
+
18
+ while (!done) {
19
+ // The dummy agent just offers 100 statically in this demo
20
+ Action a = agent.act(state);
21
+ std::cout << "\n[Round " << state.getRound() + 1 << "]" << std::endl;
22
+ std::cout << " Agent Action: OFFER " << a.getPrice() << std::endl;
23
+
24
+ auto [next_state, reward, is_done] = env.step(a);
25
+ state = next_state;
26
+ done = is_done;
27
+ total_reward += reward;
28
+
29
+ std::cout << " Opponent Action: " << state.getLastOpponentAction();
30
+ if (state.getLastOpponentAction() == "OFFER") {
31
+ std::cout << " " << state.getLastOpponentOffer();
32
+ }
33
+ std::cout << "\n Step Reward: " << reward << ", Done: " << (done ? "true" : "false") << std::endl;
34
+ }
35
+
36
+ std::cout << "\nSimulation Ended. Final Aggregate Reward Processed: " << total_reward << std::endl;
37
  return 0;
38
  }