Spaces:
Sleeping
Sleeping
Atishay Jain commited on
Commit ·
025f14a
1
Parent(s): 34dfc61
feat: implement Opponent class and add simulation testing framework for negotiation environment
Browse files- build.bat +13 -0
- env/NegotiationEnv.cpp +97 -8
- env/NegotiationEnv.h +7 -3
- opponent/Opponent.cpp +66 -0
- opponent/Opponent.h +25 -0
- tests/experiment.cpp +104 -0
- tests/run_experiments.py +114 -0
- tests/simulation.cpp +30 -7
build.bat
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
echo Building Meta OpenEnv C++ Project...
|
| 3 |
+
g++ tests/simulation.cpp env/NegotiationEnv.cpp opponent/Opponent.cpp -I. -std=c++17 -o test_sim.exe
|
| 4 |
+
|
| 5 |
+
if %ERRORLEVEL% EQU 0 (
|
| 6 |
+
echo Build Successful! Running simulation...
|
| 7 |
+
echo =======================================
|
| 8 |
+
.\test_sim.exe
|
| 9 |
+
echo =======================================
|
| 10 |
+
) else (
|
| 11 |
+
echo Build Failed! Make sure g++ is installed and in your PATH.
|
| 12 |
+
)
|
| 13 |
+
pause
|
env/NegotiationEnv.cpp
CHANGED
|
@@ -1,22 +1,111 @@
|
|
| 1 |
#include "NegotiationEnv.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
NegotiationEnv::NegotiationEnv() {
|
| 4 |
-
|
| 5 |
-
agent_value = 0;
|
| 6 |
-
opponent_value = 0;
|
| 7 |
-
opponent_type = "fair";
|
| 8 |
}
|
| 9 |
|
| 10 |
void NegotiationEnv::reset() {
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
}
|
| 15 |
|
| 16 |
std::tuple<State, double, bool> NegotiationEnv::step(Action action) {
|
| 17 |
-
//
|
|
|
|
|
|
|
|
|
|
| 18 |
double reward = 0.0;
|
| 19 |
bool done = false;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
return std::make_tuple(state, reward, done);
|
| 22 |
}
|
|
|
|
| 1 |
#include "NegotiationEnv.h"
|
| 2 |
+
#include "../opponent/Opponent.h"
|
| 3 |
+
#include <random>
|
| 4 |
+
#include <ctime>
|
| 5 |
+
#include <cmath>
|
| 6 |
|
| 7 |
NegotiationEnv::NegotiationEnv() {
|
| 8 |
+
std::srand(static_cast<unsigned int>(std::time(nullptr)));
|
|
|
|
|
|
|
|
|
|
| 9 |
}
|
| 10 |
|
| 11 |
void NegotiationEnv::reset() {
|
| 12 |
+
state.setRound(0);
|
| 13 |
+
state.setMaxRounds(20);
|
| 14 |
+
|
| 15 |
+
agent_value = 100 + (std::rand() % 901);
|
| 16 |
+
opponent_value = 100 + (std::rand() % 901);
|
| 17 |
+
|
| 18 |
+
if (std::rand() % 2 == 0) {
|
| 19 |
+
state.setRole("buyer");
|
| 20 |
+
} else {
|
| 21 |
+
state.setRole("seller");
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
int t = std::rand() % 3;
|
| 25 |
+
if (t == 0) opponent_type = "greedy";
|
| 26 |
+
else if (t == 1) opponent_type = "fair";
|
| 27 |
+
else opponent_type = "impatient";
|
| 28 |
+
|
| 29 |
+
int initial_offer = (agent_value + opponent_value) / 2;
|
| 30 |
+
state.setCurrentOffer(initial_offer);
|
| 31 |
+
state.setLastOpponentAction("START");
|
| 32 |
+
state.setLastOpponentOffer(0);
|
| 33 |
+
|
| 34 |
+
std::string opp_role = (state.getRole() == "buyer") ? "seller" : "buyer";
|
| 35 |
+
opponent_strategy = std::make_unique<Opponent>(opponent_type, opponent_value, opp_role);
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
double NegotiationEnv::compute_reward(int deal_price) {
|
| 39 |
+
double profit = 0;
|
| 40 |
+
if (state.getRole() == "seller") {
|
| 41 |
+
profit = deal_price - agent_value;
|
| 42 |
+
} else {
|
| 43 |
+
profit = agent_value - deal_price;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
double time_factor = 1.0 - (static_cast<double>(state.getRound()) / state.getMaxRounds());
|
| 47 |
+
double reward = profit * time_factor;
|
| 48 |
+
|
| 49 |
+
if (profit < 0) {
|
| 50 |
+
reward -= 20;
|
| 51 |
+
}
|
| 52 |
+
return reward;
|
| 53 |
}
|
| 54 |
|
| 55 |
std::tuple<State, double, bool> NegotiationEnv::step(Action action) {
|
| 56 |
+
// 1. Increment round
|
| 57 |
+
int current_round = state.getRound() + 1;
|
| 58 |
+
state.setRound(current_round);
|
| 59 |
+
|
| 60 |
double reward = 0.0;
|
| 61 |
bool done = false;
|
| 62 |
+
|
| 63 |
+
if (action.getType() == ActionType::ACCEPT) {
|
| 64 |
+
// 2. If ACCEPT
|
| 65 |
+
int deal_price = state.getLastOpponentOffer();
|
| 66 |
+
reward = compute_reward(deal_price);
|
| 67 |
+
done = true;
|
| 68 |
+
} else if (action.getType() == ActionType::REJECT) {
|
| 69 |
+
// 3. If REJECT
|
| 70 |
+
reward = -50;
|
| 71 |
+
done = true;
|
| 72 |
+
} else if (action.getType() == ActionType::OFFER) {
|
| 73 |
+
// 4. If OFFER
|
| 74 |
+
Action opp_response = opponent_strategy->getResponse(state, action);
|
| 75 |
+
|
| 76 |
+
if (opp_response.getType() == ActionType::ACCEPT) {
|
| 77 |
+
int deal_price = action.getPrice();
|
| 78 |
+
reward = compute_reward(deal_price);
|
| 79 |
+
done = true;
|
| 80 |
+
|
| 81 |
+
// 5. Update state
|
| 82 |
+
state.setLastOpponentAction("ACCEPT");
|
| 83 |
+
} else {
|
| 84 |
+
// Opponent generates counter-offer
|
| 85 |
+
state.setCurrentOffer(opp_response.getPrice());
|
| 86 |
+
// 5. Update state
|
| 87 |
+
state.setLastOpponentAction("OFFER");
|
| 88 |
+
state.setLastOpponentOffer(opp_response.getPrice());
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
// 6. If round == max_rounds
|
| 93 |
+
if (!done && current_round >= state.getMaxRounds()) {
|
| 94 |
+
reward = -50;
|
| 95 |
+
done = true;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
// Apply Aggression Penalty if deal resolves this step
|
| 99 |
+
if (done && action.getType() == ActionType::OFFER) {
|
| 100 |
+
if (std::abs(action.getPrice() - opponent_value) > 150) {
|
| 101 |
+
reward -= 2;
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
// 7. If NOT done (IMPORTANT strict rule)
|
| 106 |
+
if (!done) {
|
| 107 |
+
reward = 0;
|
| 108 |
+
}
|
| 109 |
|
| 110 |
return std::make_tuple(state, reward, done);
|
| 111 |
}
|
env/NegotiationEnv.h
CHANGED
|
@@ -1,17 +1,21 @@
|
|
| 1 |
#pragma once
|
| 2 |
#include "State.h"
|
| 3 |
#include "Action.h"
|
|
|
|
| 4 |
#include <tuple>
|
| 5 |
#include <string>
|
|
|
|
| 6 |
|
| 7 |
class NegotiationEnv {
|
| 8 |
private:
|
| 9 |
State state;
|
| 10 |
-
|
| 11 |
-
// Hidden (internal)
|
| 12 |
int agent_value;
|
| 13 |
int opponent_value;
|
| 14 |
-
std::string opponent_type;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
public:
|
| 17 |
NegotiationEnv();
|
|
|
|
| 1 |
#pragma once
|
| 2 |
#include "State.h"
|
| 3 |
#include "Action.h"
|
| 4 |
+
#include "../opponent/OpponentStrategy.h"
|
| 5 |
#include <tuple>
|
| 6 |
#include <string>
|
| 7 |
+
#include <memory>
|
| 8 |
|
| 9 |
class NegotiationEnv {
|
| 10 |
private:
|
| 11 |
State state;
|
|
|
|
|
|
|
| 12 |
int agent_value;
|
| 13 |
int opponent_value;
|
| 14 |
+
std::string opponent_type;
|
| 15 |
+
|
| 16 |
+
std::unique_ptr<OpponentStrategy> opponent_strategy;
|
| 17 |
+
|
| 18 |
+
double compute_reward(int deal_price);
|
| 19 |
|
| 20 |
public:
|
| 21 |
NegotiationEnv();
|
opponent/Opponent.cpp
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "Opponent.h"
|
| 2 |
+
#include <stdexcept>
|
| 3 |
+
#include <ctime>
|
| 4 |
+
|
| 5 |
+
Opponent::Opponent(const std::string& type, int value, const std::string& role)
|
| 6 |
+
: type(type), opponent_value(value), opponent_role(role) {
|
| 7 |
+
if (type == "greedy") {
|
| 8 |
+
r = 0.05; alpha = 0.7; patience = 10; epsilon = 5;
|
| 9 |
+
} else if (type == "fair") {
|
| 10 |
+
r = 0.15; alpha = 0.4; patience = 7; epsilon = 10;
|
| 11 |
+
} else if (type == "impatient") {
|
| 12 |
+
r = 0.25; alpha = 0.2; patience = 3; epsilon = 15;
|
| 13 |
+
} else {
|
| 14 |
+
throw std::invalid_argument("Unknown opponent type");
|
| 15 |
+
}
|
| 16 |
+
concession_rate = r;
|
| 17 |
+
rng.seed(static_cast<unsigned int>(std::time(nullptr)));
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
Action Opponent::getResponse(const State& state, const Action& agent_action) {
|
| 21 |
+
if (agent_action.getType() != ActionType::OFFER) {
|
| 22 |
+
return Action(ActionType::REJECT);
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
int agent_offer = agent_action.getPrice();
|
| 26 |
+
|
| 27 |
+
// 1. Acceptance
|
| 28 |
+
bool accept = false;
|
| 29 |
+
if (opponent_role == "seller") {
|
| 30 |
+
if (agent_offer >= opponent_value) accept = true;
|
| 31 |
+
} else { // "buyer"
|
| 32 |
+
if (agent_offer <= opponent_value) accept = true;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
if (accept) {
|
| 36 |
+
return Action(ActionType::ACCEPT);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
// Patience Behavior (STRICT RULE)
|
| 40 |
+
int current_round = state.getRound();
|
| 41 |
+
if (current_round > patience) {
|
| 42 |
+
concession_rate += 0.05;
|
| 43 |
+
if (concession_rate > 0.4) concession_rate = 0.4;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
// 3. Counter Offer
|
| 47 |
+
double target = opponent_value;
|
| 48 |
+
double current_offer = state.getCurrentOffer();
|
| 49 |
+
|
| 50 |
+
double delta = target - current_offer;
|
| 51 |
+
double next_offer = current_offer + concession_rate * delta;
|
| 52 |
+
|
| 53 |
+
// Anchor Effect
|
| 54 |
+
next_offer = (1.0 - alpha) * next_offer + alpha * current_offer;
|
| 55 |
+
|
| 56 |
+
// Noise
|
| 57 |
+
std::uniform_int_distribution<int> noise_dist(-epsilon, epsilon);
|
| 58 |
+
next_offer += noise_dist(rng);
|
| 59 |
+
|
| 60 |
+
// Clamp between [100, 1000]
|
| 61 |
+
int final_offer = static_cast<int>(next_offer);
|
| 62 |
+
if (final_offer < 100) final_offer = 100;
|
| 63 |
+
if (final_offer > 1000) final_offer = 1000;
|
| 64 |
+
|
| 65 |
+
return Action(ActionType::OFFER, final_offer);
|
| 66 |
+
}
|
opponent/Opponent.h
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include <string>
|
| 3 |
+
#include <random>
|
| 4 |
+
#include "../env/State.h"
|
| 5 |
+
#include "../env/Action.h"
|
| 6 |
+
#include "OpponentStrategy.h"
|
| 7 |
+
|
| 8 |
+
class Opponent : public OpponentStrategy {
|
| 9 |
+
private:
|
| 10 |
+
std::string type;
|
| 11 |
+
int opponent_value;
|
| 12 |
+
std::string opponent_role;
|
| 13 |
+
double r;
|
| 14 |
+
double alpha;
|
| 15 |
+
int patience;
|
| 16 |
+
int epsilon;
|
| 17 |
+
|
| 18 |
+
double concession_rate;
|
| 19 |
+
|
| 20 |
+
std::mt19937 rng;
|
| 21 |
+
|
| 22 |
+
public:
|
| 23 |
+
Opponent(const std::string& type, int value, const std::string& role);
|
| 24 |
+
Action getResponse(const State& state, const Action& agent_action) override;
|
| 25 |
+
};
|
tests/experiment.cpp
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#define private public
|
| 2 |
+
#include "../env/NegotiationEnv.h"
|
| 3 |
+
#undef private
|
| 4 |
+
#include "../opponent/Opponent.h"
|
| 5 |
+
|
| 6 |
+
#include <iostream>
|
| 7 |
+
#include <iomanip>
|
| 8 |
+
#include <string>
|
| 9 |
+
#include <memory>
|
| 10 |
+
|
| 11 |
+
void run_simulation(std::string test_name, std::string opp_type, std::string role, int a_val, int o_val, int behavior_type) {
|
| 12 |
+
NegotiationEnv env;
|
| 13 |
+
env.reset();
|
| 14 |
+
|
| 15 |
+
// Force specific scenario
|
| 16 |
+
env.opponent_type = opp_type;
|
| 17 |
+
env.agent_value = a_val;
|
| 18 |
+
env.opponent_value = o_val;
|
| 19 |
+
env.state.setRole(role);
|
| 20 |
+
env.state.setCurrentOffer((a_val + o_val) / 2);
|
| 21 |
+
env.state.setLastOpponentOffer(0);
|
| 22 |
+
|
| 23 |
+
std::string opp_role = (role == "buyer") ? "seller" : "buyer";
|
| 24 |
+
env.opponent_strategy = std::make_unique<Opponent>(opp_type, o_val, opp_role);
|
| 25 |
+
|
| 26 |
+
std::cout << "\n=== " << test_name << " ===" << std::endl;
|
| 27 |
+
std::cout << "Opp Type: " << opp_type << " | Role: " << role << " | Ag_Val: " << a_val << " | Opp_Val: " << o_val << std::endl;
|
| 28 |
+
|
| 29 |
+
bool done = false;
|
| 30 |
+
double final_reward = 0;
|
| 31 |
+
int round = 0;
|
| 32 |
+
|
| 33 |
+
int action_price = 0;
|
| 34 |
+
while (!done) {
|
| 35 |
+
round = env.state.getRound() + 1;
|
| 36 |
+
|
| 37 |
+
if (behavior_type == 1) { // Baseline: fixed offer
|
| 38 |
+
|
| 39 |
+
action_price = (role == "buyer") ? 100 : 900;
|
| 40 |
+
} else if (behavior_type == 2) { // Extreme: super bad
|
| 41 |
+
action_price = (role == "buyer") ? 10 : 1500;
|
| 42 |
+
} else if (behavior_type == 3) { // Gradual improvement
|
| 43 |
+
if (role == "buyer") {
|
| 44 |
+
if (round == 1) action_price = 100;
|
| 45 |
+
else if (round == 2) action_price = a_val - 200; // e.g. 600
|
| 46 |
+
else action_price = a_val - 50; // e.g. 750 (near acceptable for opp who wants 500)
|
| 47 |
+
} else { // seller
|
| 48 |
+
if (round == 1) action_price = 1000;
|
| 49 |
+
else if (round == 2) action_price = a_val + 200;
|
| 50 |
+
else action_price = a_val + 50;
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
Action agent_action(ActionType::OFFER, action_price);
|
| 55 |
+
std::cout << "[Round " << round << "] Agent OFFER " << action_price << " -> ";
|
| 56 |
+
|
| 57 |
+
auto [next_state, reward, is_done] = env.step(agent_action);
|
| 58 |
+
done = is_done;
|
| 59 |
+
|
| 60 |
+
std::cout << "Opponent " << env.state.getLastOpponentAction();
|
| 61 |
+
if (env.state.getLastOpponentAction() == "OFFER") {
|
| 62 |
+
std::cout << " " << env.state.getLastOpponentOffer();
|
| 63 |
+
}
|
| 64 |
+
std::cout << " | Reward: " << reward << " | Done: " << (done ? "True" : "False") << std::endl;
|
| 65 |
+
|
| 66 |
+
if (done) final_reward = reward;
|
| 67 |
+
if (round >= env.state.getMaxRounds()) break;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
if (env.state.getLastOpponentAction() == "ACCEPT") {
|
| 71 |
+
// Last op action is accept, means opponent accepted the agent's OFFER.
|
| 72 |
+
// Wait, deal price is agent's offer.
|
| 73 |
+
std::cout << "Final Deal Price: " << action_price << " | Final Reward: " << final_reward << std::endl;
|
| 74 |
+
} else {
|
| 75 |
+
std::cout << "Final Deal Price: NONE (" << env.state.getLastOpponentAction() << ") | Final Reward: " << final_reward << std::endl;
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
int main() {
|
| 80 |
+
std::cout << "--- EXPERIMENT LOGS ---" << std::endl;
|
| 81 |
+
// Buyer wants to buy for as low as possible (Profit = target - price). So target = 800.
|
| 82 |
+
// Opponent is Seller, wants price >= 500.
|
| 83 |
+
|
| 84 |
+
// TEST SET 1: Baseline
|
| 85 |
+
run_simulation("TEST SET 1A (Baseline vs Greedy)", "greedy", "buyer", 800, 500, 1);
|
| 86 |
+
run_simulation("TEST SET 1B (Baseline vs Fair)", "fair", "buyer", 800, 500, 1);
|
| 87 |
+
run_simulation("TEST SET 1C (Baseline vs Impatient)", "impatient", "buyer", 800, 500, 1);
|
| 88 |
+
|
| 89 |
+
// TEST SET 2: Extreme Strategy
|
| 90 |
+
run_simulation("TEST SET 2A (Extreme vs Fair)", "fair", "buyer", 800, 500, 2);
|
| 91 |
+
run_simulation("TEST SET 2B (Extreme vs Impatient)", "impatient", "buyer", 800, 500, 2);
|
| 92 |
+
|
| 93 |
+
// TEST SET 3: Gradual Improvement
|
| 94 |
+
run_simulation("TEST SET 3A (Gradual vs Fair)", "fair", "buyer", 800, 500, 3);
|
| 95 |
+
run_simulation("TEST SET 3B (Gradual vs Impatient)", "impatient", "buyer", 800, 500, 3);
|
| 96 |
+
|
| 97 |
+
// TEST SET 4: Edge Cases
|
| 98 |
+
// Approx Equal (very tight margins)
|
| 99 |
+
run_simulation("TEST SET 4A (Edge - Approx Equal)", "fair", "buyer", 510, 500, 3);
|
| 100 |
+
// Large gap (very wide margins, easy deal)
|
| 101 |
+
run_simulation("TEST SET 4B (Edge - Large Gap)", "fair", "buyer", 900, 200, 3);
|
| 102 |
+
|
| 103 |
+
return 0;
|
| 104 |
+
}
|
tests/run_experiments.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
|
| 3 |
+
class Opponent:
|
| 4 |
+
def __init__(self, type_str, value, role):
|
| 5 |
+
self.type = type_str
|
| 6 |
+
self.opponent_value = value
|
| 7 |
+
self.opponent_role = role
|
| 8 |
+
if type_str == "greedy":
|
| 9 |
+
self.r, self.alpha, self.patience, self.epsilon = 0.05, 0.7, 10, 5
|
| 10 |
+
elif type_str == "fair":
|
| 11 |
+
self.r, self.alpha, self.patience, self.epsilon = 0.15, 0.4, 7, 10
|
| 12 |
+
elif type_str == "impatient":
|
| 13 |
+
self.r, self.alpha, self.patience, self.epsilon = 0.25, 0.2, 3, 15
|
| 14 |
+
self.concession_rate = self.r
|
| 15 |
+
|
| 16 |
+
def get_response(self, round_num, current_offer, agent_offer):
|
| 17 |
+
if self.opponent_role == "seller" and agent_offer >= self.opponent_value:
|
| 18 |
+
return "ACCEPT", agent_offer
|
| 19 |
+
if self.opponent_role == "buyer" and agent_offer <= self.opponent_value:
|
| 20 |
+
return "ACCEPT", agent_offer
|
| 21 |
+
|
| 22 |
+
if round_num > self.patience:
|
| 23 |
+
self.concession_rate = min(0.4, self.concession_rate + 0.05)
|
| 24 |
+
|
| 25 |
+
target = self.opponent_value
|
| 26 |
+
delta = target - current_offer
|
| 27 |
+
next_offer = current_offer + self.concession_rate * delta
|
| 28 |
+
next_offer = (1.0 - self.alpha) * next_offer + self.alpha * current_offer
|
| 29 |
+
next_offer += random.randint(-self.epsilon, self.epsilon)
|
| 30 |
+
next_offer = max(100, min(1000, int(next_offer)))
|
| 31 |
+
return "OFFER", next_offer
|
| 32 |
+
|
| 33 |
+
class Env:
|
| 34 |
+
def __init__(self, opp_type, a_val, o_val, role):
|
| 35 |
+
self.agent_value = a_val
|
| 36 |
+
self.opponent_value = o_val
|
| 37 |
+
self.role = role
|
| 38 |
+
self.opp_type = opp_type
|
| 39 |
+
self.opp_role = "seller" if role == "buyer" else "buyer"
|
| 40 |
+
self.opp = Opponent(opp_type, o_val, self.opp_role)
|
| 41 |
+
self.current_offer = (a_val + o_val) // 2
|
| 42 |
+
self.max_rounds = 20
|
| 43 |
+
self.round = 0
|
| 44 |
+
self.last_opp_action = "START"
|
| 45 |
+
self.last_opp_offer = 0
|
| 46 |
+
|
| 47 |
+
def step(self, action_price):
|
| 48 |
+
self.round += 1
|
| 49 |
+
aggressive = abs(action_price - self.opponent_value) > 150
|
| 50 |
+
|
| 51 |
+
opp_action, opp_price = self.opp.get_response(self.round, self.current_offer, action_price)
|
| 52 |
+
done = False
|
| 53 |
+
reward = 0
|
| 54 |
+
|
| 55 |
+
if opp_action == "ACCEPT":
|
| 56 |
+
deal_price = action_price
|
| 57 |
+
done = True
|
| 58 |
+
self.last_opp_action = "ACCEPT"
|
| 59 |
+
self.last_opp_offer = deal_price
|
| 60 |
+
|
| 61 |
+
profit = deal_price - self.agent_value if self.role == "seller" else self.agent_value - deal_price
|
| 62 |
+
t_factor = 1.0 - (self.round / self.max_rounds)
|
| 63 |
+
reward = profit * t_factor
|
| 64 |
+
if profit < 0: reward -= 20
|
| 65 |
+
if aggressive: reward -= 2
|
| 66 |
+
|
| 67 |
+
else:
|
| 68 |
+
self.current_offer = opp_price
|
| 69 |
+
self.last_opp_action = "OFFER"
|
| 70 |
+
self.last_opp_offer = opp_price
|
| 71 |
+
if self.round >= self.max_rounds:
|
| 72 |
+
reward = -50
|
| 73 |
+
done = True
|
| 74 |
+
|
| 75 |
+
return reward, done
|
| 76 |
+
|
| 77 |
+
def run_sim(name, opp_type, role, a_val, o_val, b_type):
|
| 78 |
+
print(f"\n=== {name} ===")
|
| 79 |
+
print(f"Opponent Type: {opp_type} | Agent Role: {role} | Agent Value: {a_val} | Opp Value: {o_val}")
|
| 80 |
+
env = Env(opp_type, a_val, o_val, role)
|
| 81 |
+
done = False
|
| 82 |
+
|
| 83 |
+
while not done and env.round <= 25:
|
| 84 |
+
act_price = 0
|
| 85 |
+
rnd = env.round + 1
|
| 86 |
+
if b_type == 1:
|
| 87 |
+
act_price = 100 if role == "buyer" else 900
|
| 88 |
+
elif b_type == 2:
|
| 89 |
+
act_price = 10 if role == "buyer" else 1500
|
| 90 |
+
elif b_type == 3:
|
| 91 |
+
if role == "buyer":
|
| 92 |
+
act_price = 100 if rnd == 1 else (o_val - 100 if rnd == 2 else o_val)
|
| 93 |
+
else:
|
| 94 |
+
act_price = 1000 if rnd == 1 else (o_val + 100 if rnd == 2 else o_val)
|
| 95 |
+
|
| 96 |
+
r, d = env.step(act_price)
|
| 97 |
+
done = d
|
| 98 |
+
opp_val_print = env.last_opp_offer if env.last_opp_action == "OFFER" else ""
|
| 99 |
+
print(f"[Round {env.round}] Agent OFFER {act_price} -> Opponent {env.last_opp_action} {opp_val_print} | Step Reward: {r:.2f} | Done: {done}")
|
| 100 |
+
|
| 101 |
+
if env.last_opp_action == "ACCEPT":
|
| 102 |
+
print(f"Final Deal Price: {env.last_opp_offer} | Final Reward: {r:.2f}")
|
| 103 |
+
else:
|
| 104 |
+
print(f"Final Deal Price: NONE | Final Reward: {r:.2f}")
|
| 105 |
+
|
| 106 |
+
random.seed(42)
|
| 107 |
+
print("--- TEST LOGS ---")
|
| 108 |
+
run_sim("Test 1A Baseline Greedy", "greedy", "buyer", 800, 500, 1)
|
| 109 |
+
run_sim("Test 1B Baseline Fair", "fair", "buyer", 800, 500, 1)
|
| 110 |
+
run_sim("Test 1C Baseline Impatient", "impatient", "buyer", 800, 500, 1)
|
| 111 |
+
run_sim("Test 2A Extreme vs Fair", "fair", "buyer", 800, 500, 2)
|
| 112 |
+
run_sim("Test 3A Gradual vs Fair", "fair", "buyer", 800, 500, 3)
|
| 113 |
+
run_sim("Test 4A Edge Approx Equal", "fair", "buyer", 510, 500, 3)
|
| 114 |
+
run_sim("Test 4B Edge Large Gap", "fair", "buyer", 900, 200, 3)
|
tests/simulation.cpp
CHANGED
|
@@ -1,15 +1,38 @@
|
|
| 1 |
#include <iostream>
|
| 2 |
#include "../env/NegotiationEnv.h"
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
| 5 |
NegotiationEnv env;
|
|
|
|
|
|
|
| 6 |
env.reset();
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
return 0;
|
| 15 |
}
|
|
|
|
| 1 |
#include <iostream>
|
| 2 |
#include "../env/NegotiationEnv.h"
|
| 3 |
+
#include "../agents/DummyAgent.cpp"
|
| 4 |
|
| 5 |
+
int main() {
|
| 6 |
+
std::cout << "Starting Phase 2 Simulation Trace..." << std::endl;
|
| 7 |
NegotiationEnv env;
|
| 8 |
+
DummyAgent agent;
|
| 9 |
+
|
| 10 |
env.reset();
|
| 11 |
+
State state = env.getState();
|
| 12 |
+
std::cout << "Agent Role: " << state.getRole() << std::endl;
|
| 13 |
+
std::cout << "Initial Offer state: " << state.getCurrentOffer() << std::endl;
|
| 14 |
|
| 15 |
+
bool done = false;
|
| 16 |
+
double total_reward = 0;
|
| 17 |
+
|
| 18 |
+
while (!done) {
|
| 19 |
+
// The dummy agent just offers 100 statically in this demo
|
| 20 |
+
Action a = agent.act(state);
|
| 21 |
+
std::cout << "\n[Round " << state.getRound() + 1 << "]" << std::endl;
|
| 22 |
+
std::cout << " Agent Action: OFFER " << a.getPrice() << std::endl;
|
| 23 |
+
|
| 24 |
+
auto [next_state, reward, is_done] = env.step(a);
|
| 25 |
+
state = next_state;
|
| 26 |
+
done = is_done;
|
| 27 |
+
total_reward += reward;
|
| 28 |
+
|
| 29 |
+
std::cout << " Opponent Action: " << state.getLastOpponentAction();
|
| 30 |
+
if (state.getLastOpponentAction() == "OFFER") {
|
| 31 |
+
std::cout << " " << state.getLastOpponentOffer();
|
| 32 |
+
}
|
| 33 |
+
std::cout << "\n Step Reward: " << reward << ", Done: " << (done ? "true" : "false") << std::endl;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
std::cout << "\nSimulation Ended. Final Aggregate Reward Processed: " << total_reward << std::endl;
|
| 37 |
return 0;
|
| 38 |
}
|