Spaces:

MridulNegi2005
/

negotiation-openenv

Sleeping

App Files Files Community

Atishay Jain commited on Apr 5

Commit

025f14a

1 Parent(s): 34dfc61

feat: implement Opponent class and add simulation testing framework for negotiation environment

Browse files

Files changed (8) hide show

build.bat +13 -0
env/NegotiationEnv.cpp +97 -8
env/NegotiationEnv.h +7 -3
opponent/Opponent.cpp +66 -0
opponent/Opponent.h +25 -0
tests/experiment.cpp +104 -0
tests/run_experiments.py +114 -0
tests/simulation.cpp +30 -7

build.bat ADDED Viewed

	@@ -0,0 +1,13 @@

+@echo off
+echo Building Meta OpenEnv C++ Project...
+g++ tests/simulation.cpp env/NegotiationEnv.cpp opponent/Opponent.cpp -I. -std=c++17 -o test_sim.exe
+if %ERRORLEVEL% EQU 0 (
+    echo Build Successful! Running simulation...
+    echo =======================================
+    .\test_sim.exe
+    echo =======================================
+) else (
+    echo Build Failed! Make sure g++ is installed and in your PATH.
+)
+pause

env/NegotiationEnv.cpp CHANGED Viewed

@@ -1,22 +1,111 @@
 #include "NegotiationEnv.h"
 NegotiationEnv::NegotiationEnv() {
-    // Initialization logic placeholder
-    agent_value = 0;
-    opponent_value = 0;
-    opponent_type = "fair";
 }
 void NegotiationEnv::reset() {
-    // Initialize episode placeholder
-    State new_state;
-    state = new_state;
 }
 std::tuple<State, double, bool> NegotiationEnv::step(Action action) {
-    // Process action placeholder
     double reward = 0.0;
     bool done = false;
     return std::make_tuple(state, reward, done);
 }

 #include "NegotiationEnv.h"
+#include "../opponent/Opponent.h"
+#include <random>
+#include <ctime>
+#include <cmath>
 NegotiationEnv::NegotiationEnv() {
+    std::srand(static_cast<unsigned int>(std::time(nullptr)));
 }
 void NegotiationEnv::reset() {
+    state.setRound(0);
+    state.setMaxRounds(20);
+    agent_value = 100 + (std::rand() % 901);
+    opponent_value = 100 + (std::rand() % 901);
+    if (std::rand() % 2 == 0) {
+        state.setRole("buyer");
+    } else {
+        state.setRole("seller");
+    }
+    int t = std::rand() % 3;
+    if (t == 0) opponent_type = "greedy";
+    else if (t == 1) opponent_type = "fair";
+    else opponent_type = "impatient";
+    int initial_offer = (agent_value + opponent_value) / 2;
+    state.setCurrentOffer(initial_offer);
+    state.setLastOpponentAction("START");
+    state.setLastOpponentOffer(0);
+    std::string opp_role = (state.getRole() == "buyer") ? "seller" : "buyer";
+    opponent_strategy = std::make_unique<Opponent>(opponent_type, opponent_value, opp_role);
+}
+double NegotiationEnv::compute_reward(int deal_price) {
+    double profit = 0;
+    if (state.getRole() == "seller") {
+        profit = deal_price - agent_value;
+    } else {
+        profit = agent_value - deal_price;
+    }
+    double time_factor = 1.0 - (static_cast<double>(state.getRound()) / state.getMaxRounds());
+    double reward = profit * time_factor;
+    if (profit < 0) {
+        reward -= 20;
+    }
+    return reward;
 }
 std::tuple<State, double, bool> NegotiationEnv::step(Action action) {
+    // 1. Increment round
+    int current_round = state.getRound() + 1;
+    state.setRound(current_round);
     double reward = 0.0;
     bool done = false;
+    if (action.getType() == ActionType::ACCEPT) {
+        // 2. If ACCEPT
+        int deal_price = state.getLastOpponentOffer();
+        reward = compute_reward(deal_price);
+        done = true;
+    } else if (action.getType() == ActionType::REJECT) {
+        // 3. If REJECT
+        reward = -50;
+        done = true;
+    } else if (action.getType() == ActionType::OFFER) {
+        // 4. If OFFER
+        Action opp_response = opponent_strategy->getResponse(state, action);
+        if (opp_response.getType() == ActionType::ACCEPT) {
+            int deal_price = action.getPrice();
+            reward = compute_reward(deal_price);
+            done = true;
+            // 5. Update state
+            state.setLastOpponentAction("ACCEPT");
+        } else {
+            // Opponent generates counter-offer
+            state.setCurrentOffer(opp_response.getPrice());
+            // 5. Update state
+            state.setLastOpponentAction("OFFER");
+            state.setLastOpponentOffer(opp_response.getPrice());
+        }
+    }
+    // 6. If round == max_rounds
+    if (!done && current_round >= state.getMaxRounds()) {
+        reward = -50;
+        done = true;
+    }
+    // Apply Aggression Penalty if deal resolves this step
+    if (done && action.getType() == ActionType::OFFER) {
+        if (std::abs(action.getPrice() - opponent_value) > 150) {
+            reward -= 2;
+        }
+    }
+    // 7. If NOT done (IMPORTANT strict rule)
+    if (!done) {
+        reward = 0;
+    }
     return std::make_tuple(state, reward, done);
 }

env/NegotiationEnv.h CHANGED Viewed

@@ -1,17 +1,21 @@
 #pragma once
 #include "State.h"
 #include "Action.h"
 #include <tuple>
 #include <string>
 class NegotiationEnv {
 private:
     State state;
-    // Hidden (internal)
     int agent_value;
     int opponent_value;
-    std::string opponent_type; // "greedy", "fair", "impatient"
 public:
     NegotiationEnv();

 #pragma once
 #include "State.h"
 #include "Action.h"
+#include "../opponent/OpponentStrategy.h"
 #include <tuple>
 #include <string>
+#include <memory>
 class NegotiationEnv {
 private:
     State state;
     int agent_value;
     int opponent_value;
+    std::string opponent_type;
+    std::unique_ptr<OpponentStrategy> opponent_strategy;
+    double compute_reward(int deal_price);
 public:
     NegotiationEnv();

opponent/Opponent.cpp ADDED Viewed

	@@ -0,0 +1,66 @@

+#include "Opponent.h"
+#include <stdexcept>
+#include <ctime>
+Opponent::Opponent(const std::string& type, int value, const std::string& role)
+    : type(type), opponent_value(value), opponent_role(role) {
+    if (type == "greedy") {
+        r = 0.05; alpha = 0.7; patience = 10; epsilon = 5;
+    } else if (type == "fair") {
+        r = 0.15; alpha = 0.4; patience = 7; epsilon = 10;
+    } else if (type == "impatient") {
+        r = 0.25; alpha = 0.2; patience = 3; epsilon = 15;
+    } else {
+        throw std::invalid_argument("Unknown opponent type");
+    }
+    concession_rate = r;
+    rng.seed(static_cast<unsigned int>(std::time(nullptr)));
+}
+Action Opponent::getResponse(const State& state, const Action& agent_action) {
+    if (agent_action.getType() != ActionType::OFFER) {
+        return Action(ActionType::REJECT);
+    }
+    int agent_offer = agent_action.getPrice();
+    // 1. Acceptance
+    bool accept = false;
+    if (opponent_role == "seller") {
+        if (agent_offer >= opponent_value) accept = true;
+    } else { // "buyer"
+        if (agent_offer <= opponent_value) accept = true;
+    }
+    if (accept) {
+        return Action(ActionType::ACCEPT);
+    }
+    // Patience Behavior (STRICT RULE)
+    int current_round = state.getRound();
+    if (current_round > patience) {
+        concession_rate += 0.05;
+        if (concession_rate > 0.4) concession_rate = 0.4;
+    }
+    // 3. Counter Offer
+    double target = opponent_value;
+    double current_offer = state.getCurrentOffer();
+    double delta = target - current_offer;
+    double next_offer = current_offer + concession_rate * delta;
+    // Anchor Effect
+    next_offer = (1.0 - alpha) * next_offer + alpha * current_offer;
+    // Noise
+    std::uniform_int_distribution<int> noise_dist(-epsilon, epsilon);
+    next_offer += noise_dist(rng);
+    // Clamp between [100, 1000]
+    int final_offer = static_cast<int>(next_offer);
+    if (final_offer < 100) final_offer = 100;
+    if (final_offer > 1000) final_offer = 1000;
+    return Action(ActionType::OFFER, final_offer);
+}

opponent/Opponent.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+#include <string>
+#include <random>
+#include "../env/State.h"
+#include "../env/Action.h"
+#include "OpponentStrategy.h"
+class Opponent : public OpponentStrategy {
+private:
+    std::string type;
+    int opponent_value;
+    std::string opponent_role;
+    double r;
+    double alpha;
+    int patience;
+    int epsilon;
+    double concession_rate;
+    std::mt19937 rng;
+public:
+    Opponent(const std::string& type, int value, const std::string& role);
+    Action getResponse(const State& state, const Action& agent_action) override;
+};

tests/experiment.cpp ADDED Viewed

	@@ -0,0 +1,104 @@

+#define private public
+#include "../env/NegotiationEnv.h"
+#undef private
+#include "../opponent/Opponent.h"
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <memory>
+void run_simulation(std::string test_name, std::string opp_type, std::string role, int a_val, int o_val, int behavior_type) {
+    NegotiationEnv env;
+    env.reset();
+    // Force specific scenario
+    env.opponent_type = opp_type;
+    env.agent_value = a_val;
+    env.opponent_value = o_val;
+    env.state.setRole(role);
+    env.state.setCurrentOffer((a_val + o_val) / 2);
+    env.state.setLastOpponentOffer(0);
+    std::string opp_role = (role == "buyer") ? "seller" : "buyer";
+    env.opponent_strategy = std::make_unique<Opponent>(opp_type, o_val, opp_role);
+    std::cout << "\n=== " << test_name << " ===" << std::endl;
+    std::cout << "Opp Type: " << opp_type << " | Role: " << role << " | Ag_Val: " << a_val << " | Opp_Val: " << o_val << std::endl;
+    bool done = false;
+    double final_reward = 0;
+    int round = 0;
+    int action_price = 0;
+    while (!done) {
+        round = env.state.getRound() + 1;
+        if (behavior_type == 1) { // Baseline: fixed offer
+            action_price = (role == "buyer") ? 100 : 900;
+        } else if (behavior_type == 2) { // Extreme: super bad
+            action_price = (role == "buyer") ? 10 : 1500;
+        } else if (behavior_type == 3) { // Gradual improvement
+            if (role == "buyer") {
+                if (round == 1) action_price = 100;
+                else if (round == 2) action_price = a_val - 200; // e.g. 600
+                else action_price = a_val - 50; // e.g. 750 (near acceptable for opp who wants 500)
+            } else { // seller
+                if (round == 1) action_price = 1000;
+                else if (round == 2) action_price = a_val + 200;
+                else action_price = a_val + 50;
+            }
+        }
+        Action agent_action(ActionType::OFFER, action_price);
+        std::cout << "[Round " << round << "] Agent OFFER " << action_price << " -> ";
+        auto [next_state, reward, is_done] = env.step(agent_action);
+        done = is_done;
+        std::cout << "Opponent " << env.state.getLastOpponentAction();
+        if (env.state.getLastOpponentAction() == "OFFER") {
+            std::cout << " " << env.state.getLastOpponentOffer();
+        }
+        std::cout << " | Reward: " << reward << " | Done: " << (done ? "True" : "False") << std::endl;
+        if (done) final_reward = reward;
+        if (round >= env.state.getMaxRounds()) break;
+    }
+    if (env.state.getLastOpponentAction() == "ACCEPT") {
+        // Last op action is accept, means opponent accepted the agent's OFFER.
+        // Wait, deal price is agent's offer.
+        std::cout << "Final Deal Price: " << action_price << " | Final Reward: " << final_reward << std::endl;
+    } else {
+        std::cout << "Final Deal Price: NONE (" << env.state.getLastOpponentAction() << ") | Final Reward: " << final_reward << std::endl;
+    }
+}
+int main() {
+    std::cout << "--- EXPERIMENT LOGS ---" << std::endl;
+    // Buyer wants to buy for as low as possible (Profit = target - price). So target = 800.
+    // Opponent is Seller, wants price >= 500.
+    // TEST SET 1: Baseline
+    run_simulation("TEST SET 1A (Baseline vs Greedy)", "greedy", "buyer", 800, 500, 1);
+    run_simulation("TEST SET 1B (Baseline vs Fair)", "fair", "buyer", 800, 500, 1);
+    run_simulation("TEST SET 1C (Baseline vs Impatient)", "impatient", "buyer", 800, 500, 1);
+    // TEST SET 2: Extreme Strategy
+    run_simulation("TEST SET 2A (Extreme vs Fair)", "fair", "buyer", 800, 500, 2);
+    run_simulation("TEST SET 2B (Extreme vs Impatient)", "impatient", "buyer", 800, 500, 2);
+    // TEST SET 3: Gradual Improvement
+    run_simulation("TEST SET 3A (Gradual vs Fair)", "fair", "buyer", 800, 500, 3);
+    run_simulation("TEST SET 3B (Gradual vs Impatient)", "impatient", "buyer", 800, 500, 3);
+    // TEST SET 4: Edge Cases
+    // Approx Equal (very tight margins)
+    run_simulation("TEST SET 4A (Edge - Approx Equal)", "fair", "buyer", 510, 500, 3);
+    // Large gap (very wide margins, easy deal)
+    run_simulation("TEST SET 4B (Edge - Large Gap)", "fair", "buyer", 900, 200, 3);
+    return 0;
+}

tests/run_experiments.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import random
+class Opponent:
+    def __init__(self, type_str, value, role):
+        self.type = type_str
+        self.opponent_value = value
+        self.opponent_role = role
+        if type_str == "greedy":
+            self.r, self.alpha, self.patience, self.epsilon = 0.05, 0.7, 10, 5
+        elif type_str == "fair":
+            self.r, self.alpha, self.patience, self.epsilon = 0.15, 0.4, 7, 10
+        elif type_str == "impatient":
+            self.r, self.alpha, self.patience, self.epsilon = 0.25, 0.2, 3, 15
+        self.concession_rate = self.r
+    def get_response(self, round_num, current_offer, agent_offer):
+        if self.opponent_role == "seller" and agent_offer >= self.opponent_value:
+            return "ACCEPT", agent_offer
+        if self.opponent_role == "buyer" and agent_offer <= self.opponent_value:
+            return "ACCEPT", agent_offer
+        if round_num > self.patience:
+            self.concession_rate = min(0.4, self.concession_rate + 0.05)
+        target = self.opponent_value
+        delta = target - current_offer
+        next_offer = current_offer + self.concession_rate * delta
+        next_offer = (1.0 - self.alpha) * next_offer + self.alpha * current_offer
+        next_offer += random.randint(-self.epsilon, self.epsilon)
+        next_offer = max(100, min(1000, int(next_offer)))
+        return "OFFER", next_offer
+class Env:
+    def __init__(self, opp_type, a_val, o_val, role):
+        self.agent_value = a_val
+        self.opponent_value = o_val
+        self.role = role
+        self.opp_type = opp_type
+        self.opp_role = "seller" if role == "buyer" else "buyer"
+        self.opp = Opponent(opp_type, o_val, self.opp_role)
+        self.current_offer = (a_val + o_val) // 2
+        self.max_rounds = 20
+        self.round = 0
+        self.last_opp_action = "START"
+        self.last_opp_offer = 0
+    def step(self, action_price):
+        self.round += 1
+        aggressive = abs(action_price - self.opponent_value) > 150
+        opp_action, opp_price = self.opp.get_response(self.round, self.current_offer, action_price)
+        done = False
+        reward = 0
+        if opp_action == "ACCEPT":
+            deal_price = action_price
+            done = True
+            self.last_opp_action = "ACCEPT"
+            self.last_opp_offer = deal_price
+            profit = deal_price - self.agent_value if self.role == "seller" else self.agent_value - deal_price
+            t_factor = 1.0 - (self.round / self.max_rounds)
+            reward = profit * t_factor
+            if profit < 0: reward -= 20
+            if aggressive: reward -= 2
+        else:
+            self.current_offer = opp_price
+            self.last_opp_action = "OFFER"
+            self.last_opp_offer = opp_price
+            if self.round >= self.max_rounds:
+                reward = -50
+                done = True
+        return reward, done
+def run_sim(name, opp_type, role, a_val, o_val, b_type):
+    print(f"\n=== {name} ===")
+    print(f"Opponent Type: {opp_type} | Agent Role: {role} | Agent Value: {a_val} | Opp Value: {o_val}")
+    env = Env(opp_type, a_val, o_val, role)
+    done = False
+    while not done and env.round <= 25:
+        act_price = 0
+        rnd = env.round + 1
+        if b_type == 1:
+            act_price = 100 if role == "buyer" else 900
+        elif b_type == 2:
+            act_price = 10 if role == "buyer" else 1500
+        elif b_type == 3:
+            if role == "buyer":
+                act_price = 100 if rnd == 1 else (o_val - 100 if rnd == 2 else o_val)
+            else:
+                act_price = 1000 if rnd == 1 else (o_val + 100 if rnd == 2 else o_val)
+        r, d = env.step(act_price)
+        done = d
+        opp_val_print = env.last_opp_offer if env.last_opp_action == "OFFER" else ""
+        print(f"[Round {env.round}] Agent OFFER {act_price} -> Opponent {env.last_opp_action} {opp_val_print} | Step Reward: {r:.2f} | Done: {done}")
+    if env.last_opp_action == "ACCEPT":
+        print(f"Final Deal Price: {env.last_opp_offer} | Final Reward: {r:.2f}")
+    else:
+        print(f"Final Deal Price: NONE | Final Reward: {r:.2f}")
+random.seed(42)
+print("--- TEST LOGS ---")
+run_sim("Test 1A Baseline Greedy", "greedy", "buyer", 800, 500, 1)
+run_sim("Test 1B Baseline Fair", "fair", "buyer", 800, 500, 1)
+run_sim("Test 1C Baseline Impatient", "impatient", "buyer", 800, 500, 1)
+run_sim("Test 2A Extreme vs Fair", "fair", "buyer", 800, 500, 2)
+run_sim("Test 3A Gradual vs Fair", "fair", "buyer", 800, 500, 3)
+run_sim("Test 4A Edge Approx Equal", "fair", "buyer", 510, 500, 3)
+run_sim("Test 4B Edge Large Gap", "fair", "buyer", 900, 200, 3)

tests/simulation.cpp CHANGED Viewed

@@ -1,15 +1,38 @@
 #include <iostream>
 #include "../env/NegotiationEnv.h"
-void run_simulation() {
     NegotiationEnv env;
     env.reset();
-    // Placeholder for simulation loop
-    std::cout << "Simulation initialized." << std::endl;
-}
-int main() {
-    run_simulation();
     return 0;
 }

 #include <iostream>
 #include "../env/NegotiationEnv.h"
+#include "../agents/DummyAgent.cpp"
+int main() {
+    std::cout << "Starting Phase 2 Simulation Trace..." << std::endl;
     NegotiationEnv env;
+    DummyAgent agent;
     env.reset();
+    State state = env.getState();
+    std::cout << "Agent Role: " << state.getRole() << std::endl;
+    std::cout << "Initial Offer state: " << state.getCurrentOffer() << std::endl;
+    bool done = false;
+    double total_reward = 0;
+    while (!done) {
+        // The dummy agent just offers 100 statically in this demo
+        Action a = agent.act(state);
+        std::cout << "\n[Round " << state.getRound() + 1 << "]" << std::endl;
+        std::cout << "  Agent Action: OFFER " << a.getPrice() << std::endl;
+        auto [next_state, reward, is_done] = env.step(a);
+        state = next_state;
+        done = is_done;
+        total_reward += reward;
+        std::cout << "  Opponent Action: " << state.getLastOpponentAction();
+        if (state.getLastOpponentAction() == "OFFER") {
+            std::cout << " " << state.getLastOpponentOffer();
+        }
+        std::cout << "\n  Step Reward: " << reward << ", Done: " << (done ? "true" : "false") << std::endl;
+    }
+    std::cout << "\nSimulation Ended. Final Aggregate Reward Processed: " << total_reward << std::endl;
     return 0;
 }