update changes
Browse files- README.md +49 -20
- __init__.py +2 -0
- inference.py +147 -63
- openenv.yaml +5 -0
README.md
CHANGED
|
@@ -11,6 +11,8 @@ pinned: false
|
|
| 11 |
|
| 12 |
An OpenEnv-compliant environment that tests AI agents on financial data analysis, market sentiment, and trading strategy evaluation.
|
| 13 |
|
|
|
|
|
|
|
| 14 |
## ๐ฏ Overview
|
| 15 |
|
| 16 |
Quant-Gym is a benchmark environment where AI agents can practice:
|
|
@@ -21,6 +23,7 @@ Quant-Gym is a benchmark environment where AI agents can practice:
|
|
| 21 |
|
| 22 |
**This is a research benchmark for evaluating AI reasoning in financial contexts, not a trading tool.**
|
| 23 |
|
|
|
|
| 24 |
## ๐ Environment Tasks
|
| 25 |
|
| 26 |
| Task | Description | Difficulty |
|
|
@@ -29,19 +32,24 @@ Quant-Gym is a benchmark environment where AI agents can practice:
|
|
| 29 |
| **Task 2** | Analyze news headlines and recommend Buy/Sell/Hold with explanation | Medium |
|
| 30 |
| **Task 3** | Backtest a trading strategy (momentum/mean reversion) with Sharpe ratio & drawdown | Hard |
|
| 31 |
|
|
|
|
| 32 |
## ๐๏ธ API Endpoints
|
| 33 |
|
| 34 |
| Endpoint | Method | Description |
|
| 35 |
|----------|--------|-------------|
|
| 36 |
| `/` | GET | Welcome message |
|
| 37 |
| `/health` | GET | Health check |
|
|
|
|
|
|
|
| 38 |
| `/reset` | POST | Reset environment to initial state |
|
| 39 |
-
| `/step` | POST | Execute an action
|
| 40 |
| `/state` | GET | Get current environment state |
|
| 41 |
| `/tasks` | GET | List all available tasks |
|
| 42 |
-
| `/docs` | GET | Interactive API documentation
|
|
|
|
| 43 |
|
| 44 |
## ๐ง Installation
|
|
|
|
| 45 |
### Prerequisites
|
| 46 |
- Python 3.10+
|
| 47 |
- Docker (for containerized deployment)
|
|
@@ -56,13 +64,14 @@ cd quant-gym-openenv
|
|
| 56 |
# Install dependencies
|
| 57 |
pip install -r requirements.txt
|
| 58 |
|
| 59 |
-
# Set up Hugging Face token
|
| 60 |
-
'HF_TOKEN=your_hf_token_here'
|
| 61 |
|
| 62 |
# Start the server
|
| 63 |
python -m uvicorn server.app:app --host 0.0.0.0 --port 8000 --reload
|
| 64 |
|
| 65 |
|
|
|
|
| 66 |
๐ฎ Action Schema
|
| 67 |
The agent can take the following actions:
|
| 68 |
|
|
@@ -102,9 +111,11 @@ json
|
|
| 102 |
"total_return": 0.18
|
| 103 |
}
|
| 104 |
}
|
| 105 |
-
๐ Running the Baseline Agent
|
| 106 |
|
| 107 |
|
|
|
|
|
|
|
|
|
|
| 108 |
# Set your Hugging Face token
|
| 109 |
export HF_TOKEN="your_hf_token_here"
|
| 110 |
|
|
@@ -112,13 +123,15 @@ export HF_TOKEN="your_hf_token_here"
|
|
| 112 |
python inference.py
|
| 113 |
Expected Output
|
| 114 |
text
|
| 115 |
-
[INFO]
|
| 116 |
-
[START] task=quant-gym env=quant-gym model=
|
| 117 |
[STEP] step=1 action=BUY 5 reward=0.15 done=false error=null
|
| 118 |
[STEP] step=2 action=GET_PRICE reward=0.05 done=false error=null
|
| 119 |
[STEP] step=3 action=SELL 5 reward=0.20 done=false error=null
|
| 120 |
...
|
| 121 |
[END] success=true steps=10 score=0.650 rewards=...
|
|
|
|
|
|
|
| 122 |
๐ณ Docker Deployment
|
| 123 |
Build and run with Docker:
|
| 124 |
|
|
@@ -133,6 +146,7 @@ Then access the API at http://localhost:7860
|
|
| 133 |
๐ Hugging Face Space
|
| 134 |
Live demo: https://huggingface.co/spaces/Astocoder/quant-gym
|
| 135 |
|
|
|
|
| 136 |
๐ Project Structure
|
| 137 |
text
|
| 138 |
quant-gym-openenv/
|
|
@@ -140,26 +154,29 @@ quant-gym-openenv/
|
|
| 140 |
โโโ inference.py # Baseline agent script
|
| 141 |
โโโ models.py # Pydantic schemas
|
| 142 |
โโโ openenv.yaml # OpenEnv configuration
|
| 143 |
-
โโโ pyproject.toml
|
| 144 |
โโโ requirements.txt # Python dependencies
|
| 145 |
โโโ README.md # This file
|
|
|
|
|
|
|
|
|
|
| 146 |
โโโ server/
|
| 147 |
-
โ โโโ app.py
|
| 148 |
-
โ โโโ environment.py
|
| 149 |
โ โโโ data/
|
| 150 |
-
โ โโโ prices.csv
|
| 151 |
-
โ โโโ news.json
|
| 152 |
-
โโโ graders/
|
| 153 |
-
โโโ task1_grader.py
|
| 154 |
-
โโโ task2_grader.py
|
| 155 |
-
โโโ task3_grader.py
|
| 156 |
|
| 157 |
|
| 158 |
๐ Environment Variables
|
| 159 |
Variable Description Default
|
| 160 |
-
HF_TOKEN Hugging Face API token None
|
| 161 |
-
API_BASE_URL
|
| 162 |
-
|
| 163 |
BASE_URL Quant-Gym API URL http://localhost:8000
|
| 164 |
|
| 165 |
|
|
@@ -172,13 +189,25 @@ Reward Function: Partial progress signals for meaningful learning
|
|
| 172 |
|
| 173 |
Reproducibility: Static data ensures consistent results
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
โ ๏ธ Disclaimer
|
| 176 |
This is a research benchmark environment for evaluating AI agent reasoning. It does not provide financial advice or real trading recommendations. All data is for simulation purposes only.
|
| 177 |
|
|
|
|
| 178 |
๐ License
|
| 179 |
MIT License - See LICENSE file for details.
|
| 180 |
|
| 181 |
Built with: Python, FastAPI, OpenEnv, Hugging Face, Docker
|
| 182 |
|
| 183 |
|
| 184 |
-
|
|
|
|
| 11 |
|
| 12 |
An OpenEnv-compliant environment that tests AI agents on financial data analysis, market sentiment, and trading strategy evaluation.
|
| 13 |
|
| 14 |
+
|
| 15 |
+
|
| 16 |
## ๐ฏ Overview
|
| 17 |
|
| 18 |
Quant-Gym is a benchmark environment where AI agents can practice:
|
|
|
|
| 23 |
|
| 24 |
**This is a research benchmark for evaluating AI reasoning in financial contexts, not a trading tool.**
|
| 25 |
|
| 26 |
+
|
| 27 |
## ๐ Environment Tasks
|
| 28 |
|
| 29 |
| Task | Description | Difficulty |
|
|
|
|
| 32 |
| **Task 2** | Analyze news headlines and recommend Buy/Sell/Hold with explanation | Medium |
|
| 33 |
| **Task 3** | Backtest a trading strategy (momentum/mean reversion) with Sharpe ratio & drawdown | Hard |
|
| 34 |
|
| 35 |
+
|
| 36 |
## ๐๏ธ API Endpoints
|
| 37 |
|
| 38 |
| Endpoint | Method | Description |
|
| 39 |
|----------|--------|-------------|
|
| 40 |
| `/` | GET | Welcome message |
|
| 41 |
| `/health` | GET | Health check |
|
| 42 |
+
| `/metadata` | GET | Environment metadata |
|
| 43 |
+
| `/schema` | GET | Action/observation schemas |
|
| 44 |
| `/reset` | POST | Reset environment to initial state |
|
| 45 |
+
| `/step` | POST | Execute an action |
|
| 46 |
| `/state` | GET | Get current environment state |
|
| 47 |
| `/tasks` | GET | List all available tasks |
|
| 48 |
+
| `/docs` | GET | Interactive API documentation |
|
| 49 |
+
|
| 50 |
|
| 51 |
## ๐ง Installation
|
| 52 |
+
|
| 53 |
### Prerequisites
|
| 54 |
- Python 3.10+
|
| 55 |
- Docker (for containerized deployment)
|
|
|
|
| 64 |
# Install dependencies
|
| 65 |
pip install -r requirements.txt
|
| 66 |
|
| 67 |
+
# Set up Hugging Face token for LLM features (create .env file)
|
| 68 |
+
echo 'HF_TOKEN=your_hf_token_here' > .env
|
| 69 |
|
| 70 |
# Start the server
|
| 71 |
python -m uvicorn server.app:app --host 0.0.0.0 --port 8000 --reload
|
| 72 |
|
| 73 |
|
| 74 |
+
|
| 75 |
๐ฎ Action Schema
|
| 76 |
The agent can take the following actions:
|
| 77 |
|
|
|
|
| 111 |
"total_return": 0.18
|
| 112 |
}
|
| 113 |
}
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
+
|
| 117 |
+
๐ Running the Baseline Agent
|
| 118 |
+
bash
|
| 119 |
# Set your Hugging Face token
|
| 120 |
export HF_TOKEN="your_hf_token_here"
|
| 121 |
|
|
|
|
| 123 |
python inference.py
|
| 124 |
Expected Output
|
| 125 |
text
|
| 126 |
+
[INFO] Starting Quant-Gym Inference
|
| 127 |
+
[START] task=quant-gym env=quant-gym model=gpt-3.5-turbo
|
| 128 |
[STEP] step=1 action=BUY 5 reward=0.15 done=false error=null
|
| 129 |
[STEP] step=2 action=GET_PRICE reward=0.05 done=false error=null
|
| 130 |
[STEP] step=3 action=SELL 5 reward=0.20 done=false error=null
|
| 131 |
...
|
| 132 |
[END] success=true steps=10 score=0.650 rewards=...
|
| 133 |
+
|
| 134 |
+
|
| 135 |
๐ณ Docker Deployment
|
| 136 |
Build and run with Docker:
|
| 137 |
|
|
|
|
| 146 |
๐ Hugging Face Space
|
| 147 |
Live demo: https://huggingface.co/spaces/Astocoder/quant-gym
|
| 148 |
|
| 149 |
+
|
| 150 |
๐ Project Structure
|
| 151 |
text
|
| 152 |
quant-gym-openenv/
|
|
|
|
| 154 |
โโโ inference.py # Baseline agent script
|
| 155 |
โโโ models.py # Pydantic schemas
|
| 156 |
โโโ openenv.yaml # OpenEnv configuration
|
| 157 |
+
โโโ pyproject.toml # Python project config
|
| 158 |
โโโ requirements.txt # Python dependencies
|
| 159 |
โโโ README.md # This file
|
| 160 |
+
โโโ task1_grader.py # Price fetch grader
|
| 161 |
+
โโโ task2_grader.py # News analysis grader
|
| 162 |
+
โโโ task3_grader.py # Backtest grader
|
| 163 |
โโโ server/
|
| 164 |
+
โ โโโ app.py # FastAPI server
|
| 165 |
+
โ โโโ environment.py # Trading logic
|
| 166 |
โ โโโ data/
|
| 167 |
+
โ โโโ prices.csv # Market data
|
| 168 |
+
โ โโโ news.json # News headlines
|
| 169 |
+
โโโ graders/ # Backup grader folder
|
| 170 |
+
โโโ task1_grader.py
|
| 171 |
+
โโโ task2_grader.py
|
| 172 |
+
โโโ task3_grader.py
|
| 173 |
|
| 174 |
|
| 175 |
๐ Environment Variables
|
| 176 |
Variable Description Default
|
| 177 |
+
HF_TOKEN Hugging Face API token None (optional)
|
| 178 |
+
API_BASE_URL LLM API endpoint None (judge provides)
|
| 179 |
+
API_KEY LLM API key None (judge provides)
|
| 180 |
BASE_URL Quant-Gym API URL http://localhost:8000
|
| 181 |
|
| 182 |
|
|
|
|
| 189 |
|
| 190 |
Reproducibility: Static data ensures consistent results
|
| 191 |
|
| 192 |
+
|
| 193 |
+
๐ก Unique Innovation
|
| 194 |
+
Unlike traditional trading environments that only measure profit, Quant-Gym rewards explanation quality:
|
| 195 |
+
|
| 196 |
+
Agents must explain their reasoning for each trade
|
| 197 |
+
|
| 198 |
+
Graders evaluate financial terminology, logical reasoning, and detail
|
| 199 |
+
|
| 200 |
+
Promotes transparent, auditable AI decision-making
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
|
| 204 |
โ ๏ธ Disclaimer
|
| 205 |
This is a research benchmark environment for evaluating AI agent reasoning. It does not provide financial advice or real trading recommendations. All data is for simulation purposes only.
|
| 206 |
|
| 207 |
+
|
| 208 |
๐ License
|
| 209 |
MIT License - See LICENSE file for details.
|
| 210 |
|
| 211 |
Built with: Python, FastAPI, OpenEnv, Hugging Face, Docker
|
| 212 |
|
| 213 |
|
|
|
__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Quant-Gym: Financial Analysis Environment for AI Agents"""
|
| 2 |
+
__version__ = "1.0.0"
|
inference.py
CHANGED
|
@@ -18,10 +18,10 @@ TEMPERATURE = 0.7
|
|
| 18 |
MAX_TOKENS = 200
|
| 19 |
SUCCESS_SCORE_THRESHOLD = 0.7
|
| 20 |
|
| 21 |
-
# System prompt
|
| 22 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 23 |
"""
|
| 24 |
-
|
| 25 |
|
| 26 |
Available actions:
|
| 27 |
- GET_PRICE: Get current stock price
|
|
@@ -30,9 +30,18 @@ SYSTEM_PROMPT = textwrap.dedent(
|
|
| 30 |
- BACKTEST [strategy]: Backtest a strategy (momentum or mean_reversion)
|
| 31 |
- GET_NEWS: Get latest news headline
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
Respond with EXACTLY one action in format: ACTION [parameter]
|
| 34 |
Example: BUY 10
|
| 35 |
Example: GET_PRICE
|
|
|
|
|
|
|
|
|
|
| 36 |
"""
|
| 37 |
).strip()
|
| 38 |
|
|
@@ -56,11 +65,14 @@ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> No
|
|
| 56 |
|
| 57 |
|
| 58 |
class QuantGymClient:
|
|
|
|
|
|
|
| 59 |
def __init__(self, base_url: str):
|
| 60 |
self.base_url = base_url
|
| 61 |
self.session = requests.Session()
|
| 62 |
|
| 63 |
def reset(self):
|
|
|
|
| 64 |
try:
|
| 65 |
response = self.session.post(f"{self.base_url}/reset")
|
| 66 |
return response.json()
|
|
@@ -68,31 +80,32 @@ class QuantGymClient:
|
|
| 68 |
print(f"[ERROR] Reset failed: {e}", flush=True)
|
| 69 |
return {"observation": {"price": 150, "balance": 10000, "holdings": 0, "portfolio_value": 10000}}
|
| 70 |
|
| 71 |
-
def step(self, action: str):
|
|
|
|
| 72 |
action_upper = action.upper()
|
| 73 |
|
| 74 |
if action_upper == "GET_PRICE":
|
| 75 |
payload = {"type": "GET_PRICE"}
|
|
|
|
|
|
|
| 76 |
elif action_upper.startswith("BUY"):
|
| 77 |
-
amount = 5
|
| 78 |
if " " in action_upper:
|
| 79 |
try:
|
| 80 |
amount = int(action_upper.split()[1])
|
| 81 |
except:
|
| 82 |
-
|
| 83 |
payload = {"type": "BUY", "amount": amount}
|
| 84 |
elif action_upper.startswith("SELL"):
|
| 85 |
-
amount = 5
|
| 86 |
if " " in action_upper:
|
| 87 |
try:
|
| 88 |
amount = int(action_upper.split()[1])
|
| 89 |
except:
|
| 90 |
-
|
| 91 |
payload = {"type": "SELL", "amount": amount}
|
| 92 |
elif action_upper.startswith("BACKTEST"):
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
payload = {"type": "
|
| 96 |
else:
|
| 97 |
payload = {"type": "GET_PRICE"}
|
| 98 |
|
|
@@ -103,30 +116,95 @@ class QuantGymClient:
|
|
| 103 |
print(f"[ERROR] Step failed: {e}", flush=True)
|
| 104 |
return {"observation": {"price": 150, "balance": 10000, "holdings": 0, "portfolio_value": 10000}}
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
def close(self):
|
|
|
|
| 107 |
self.session.close()
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def get_model_action(client: OpenAI, step: int, observation: dict, history: List[str]) -> str:
|
| 111 |
"""Get action from LLM using the judge's proxy"""
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
user_prompt = textwrap.dedent(
|
| 114 |
f"""
|
| 115 |
-
Step: {step}
|
| 116 |
-
Current price: ${observation.get('price', 'unknown')}
|
| 117 |
-
Balance: ${observation.get('balance', 'unknown')}
|
| 118 |
-
Holdings: {observation.get('holdings', 0)} shares
|
| 119 |
-
Portfolio value: ${observation.get('portfolio_value', 'unknown')}
|
| 120 |
-
Latest news: {observation.get('last_news', {}).get('headline', 'No news')}
|
| 121 |
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
"""
|
| 124 |
).strip()
|
| 125 |
|
| 126 |
try:
|
| 127 |
-
# CRITICAL: This MUST go through their proxy using BOTH env vars
|
| 128 |
completion = client.chat.completions.create(
|
| 129 |
-
model="gpt-3.5-turbo",
|
| 130 |
messages=[
|
| 131 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 132 |
{"role": "user", "content": user_prompt},
|
|
@@ -135,64 +213,61 @@ def get_model_action(client: OpenAI, step: int, observation: dict, history: List
|
|
| 135 |
max_tokens=MAX_TOKENS,
|
| 136 |
)
|
| 137 |
text = completion.choices[0].message.content or ""
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
except Exception as e:
|
| 140 |
print(f"[DEBUG] LLM error: {e}, using fallback", flush=True)
|
| 141 |
return fallback_strategy(observation)
|
| 142 |
|
| 143 |
|
| 144 |
-
def
|
| 145 |
-
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
return "BUY 5"
|
| 152 |
-
elif text.startswith("SELL"):
|
| 153 |
-
parts = text.split()
|
| 154 |
-
if len(parts) > 1 and parts[1].isdigit():
|
| 155 |
-
return f"SELL {parts[1]}"
|
| 156 |
-
return "SELL 5"
|
| 157 |
-
elif text.startswith("BACKTEST"):
|
| 158 |
-
return "BACKTEST"
|
| 159 |
-
elif text.startswith("GET_NEWS"):
|
| 160 |
-
return "GET_NEWS"
|
| 161 |
-
else:
|
| 162 |
-
return "GET_PRICE"
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
def fallback_strategy(observation: dict) -> str:
|
| 166 |
sentiment = observation.get('last_news', {}).get('sentiment', 'neutral')
|
| 167 |
if sentiment == 'positive':
|
| 168 |
-
|
| 169 |
elif sentiment == 'negative':
|
| 170 |
-
|
| 171 |
else:
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
|
| 175 |
async def main() -> None:
|
| 176 |
print("[INFO] Starting Quant-Gym Inference", flush=True)
|
|
|
|
| 177 |
|
| 178 |
# CRITICAL CHECK: Both environment variables MUST be set
|
| 179 |
if not API_BASE_URL:
|
| 180 |
-
print("[
|
| 181 |
-
print("[
|
| 182 |
-
|
|
|
|
| 183 |
|
| 184 |
if not API_KEY:
|
| 185 |
-
print("[
|
| 186 |
-
print("[
|
| 187 |
-
return
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
| 196 |
|
| 197 |
env = QuantGymClient(BASE_URL)
|
| 198 |
|
|
@@ -202,21 +277,27 @@ async def main() -> None:
|
|
| 202 |
success = False
|
| 203 |
final_score = 0.0
|
| 204 |
|
| 205 |
-
log_start(task=TASK_NAME, env=BENCHMARK, model="gpt-3.5-turbo")
|
| 206 |
|
| 207 |
try:
|
|
|
|
| 208 |
result = env.reset()
|
| 209 |
observation = result.get('observation', {})
|
|
|
|
| 210 |
|
| 211 |
for step in range(1, MAX_STEPS + 1):
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
|
|
|
| 214 |
result = env.step(action_str)
|
| 215 |
observation = result.get('observation', {})
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
reward = min(1.0, max(0.0, profit_reward))
|
| 220 |
|
| 221 |
done = step >= MAX_STEPS - 1
|
| 222 |
error = None
|
|
@@ -226,7 +307,8 @@ async def main() -> None:
|
|
| 226 |
|
| 227 |
log_step(step=step, action=action_str, reward=reward, done=done, error=error)
|
| 228 |
|
| 229 |
-
|
|
|
|
| 230 |
|
| 231 |
if done:
|
| 232 |
break
|
|
@@ -236,6 +318,8 @@ async def main() -> None:
|
|
| 236 |
|
| 237 |
except Exception as e:
|
| 238 |
print(f"[ERROR] {e}", flush=True)
|
|
|
|
|
|
|
| 239 |
success = False
|
| 240 |
final_score = 0.0
|
| 241 |
finally:
|
|
|
|
| 18 |
MAX_TOKENS = 200
|
| 19 |
SUCCESS_SCORE_THRESHOLD = 0.7
|
| 20 |
|
| 21 |
+
# System prompt for financial analysis
|
| 22 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 23 |
"""
|
| 24 |
+
It is a financial analyst AI agent. It's goal is to analyze market data and make trading decisions.
|
| 25 |
|
| 26 |
Available actions:
|
| 27 |
- GET_PRICE: Get current stock price
|
|
|
|
| 30 |
- BACKTEST [strategy]: Backtest a strategy (momentum or mean_reversion)
|
| 31 |
- GET_NEWS: Get latest news headline
|
| 32 |
|
| 33 |
+
Strategy tips:
|
| 34 |
+
- Positive news sentiment suggests BUY
|
| 35 |
+
- Negative news sentiment suggests SELL
|
| 36 |
+
- Momentum strategy: Buy when price is rising
|
| 37 |
+
- Mean reversion: Buy when price is low relative to recent average
|
| 38 |
+
|
| 39 |
Respond with EXACTLY one action in format: ACTION [parameter]
|
| 40 |
Example: BUY 10
|
| 41 |
Example: GET_PRICE
|
| 42 |
+
Example: BACKTEST momentum
|
| 43 |
+
|
| 44 |
+
For GET_NEWS, also provide a brief explanation of your analysis.
|
| 45 |
"""
|
| 46 |
).strip()
|
| 47 |
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
class QuantGymClient:
|
| 68 |
+
"""Client for interacting with Quant-Gym environment"""
|
| 69 |
+
|
| 70 |
def __init__(self, base_url: str):
|
| 71 |
self.base_url = base_url
|
| 72 |
self.session = requests.Session()
|
| 73 |
|
| 74 |
def reset(self):
|
| 75 |
+
"""Reset environment"""
|
| 76 |
try:
|
| 77 |
response = self.session.post(f"{self.base_url}/reset")
|
| 78 |
return response.json()
|
|
|
|
| 80 |
print(f"[ERROR] Reset failed: {e}", flush=True)
|
| 81 |
return {"observation": {"price": 150, "balance": 10000, "holdings": 0, "portfolio_value": 10000}}
|
| 82 |
|
| 83 |
+
def step(self, action: str, amount: int = 0, explanation: str = "", strategy: str = ""):
|
| 84 |
+
"""Execute an action"""
|
| 85 |
action_upper = action.upper()
|
| 86 |
|
| 87 |
if action_upper == "GET_PRICE":
|
| 88 |
payload = {"type": "GET_PRICE"}
|
| 89 |
+
elif action_upper == "GET_NEWS":
|
| 90 |
+
payload = {"type": "GET_NEWS", "explanation": explanation if explanation else "Analyzing market sentiment"}
|
| 91 |
elif action_upper.startswith("BUY"):
|
|
|
|
| 92 |
if " " in action_upper:
|
| 93 |
try:
|
| 94 |
amount = int(action_upper.split()[1])
|
| 95 |
except:
|
| 96 |
+
amount = 5
|
| 97 |
payload = {"type": "BUY", "amount": amount}
|
| 98 |
elif action_upper.startswith("SELL"):
|
|
|
|
| 99 |
if " " in action_upper:
|
| 100 |
try:
|
| 101 |
amount = int(action_upper.split()[1])
|
| 102 |
except:
|
| 103 |
+
amount = 5
|
| 104 |
payload = {"type": "SELL", "amount": amount}
|
| 105 |
elif action_upper.startswith("BACKTEST"):
|
| 106 |
+
if " " in action_upper:
|
| 107 |
+
strategy = action_upper.split()[1]
|
| 108 |
+
payload = {"type": "BACKTEST", "strategy": strategy if strategy else "momentum"}
|
| 109 |
else:
|
| 110 |
payload = {"type": "GET_PRICE"}
|
| 111 |
|
|
|
|
| 116 |
print(f"[ERROR] Step failed: {e}", flush=True)
|
| 117 |
return {"observation": {"price": 150, "balance": 10000, "holdings": 0, "portfolio_value": 10000}}
|
| 118 |
|
| 119 |
+
def get_tasks(self):
|
| 120 |
+
"""Get available tasks"""
|
| 121 |
+
try:
|
| 122 |
+
response = self.session.get(f"{self.base_url}/tasks")
|
| 123 |
+
return response.json()
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"[ERROR] Get tasks failed: {e}", flush=True)
|
| 126 |
+
return {"tasks": []}
|
| 127 |
+
|
| 128 |
def close(self):
|
| 129 |
+
"""Close the session"""
|
| 130 |
self.session.close()
|
| 131 |
|
| 132 |
|
| 133 |
+
def parse_action_from_response(text: str) -> str:
|
| 134 |
+
"""Parse LLM response into action string"""
|
| 135 |
+
text = text.strip().upper()
|
| 136 |
+
|
| 137 |
+
if text.startswith("BUY"):
|
| 138 |
+
parts = text.split()
|
| 139 |
+
if len(parts) > 1 and parts[1].isdigit():
|
| 140 |
+
return f"BUY {parts[1]}"
|
| 141 |
+
return "BUY 5"
|
| 142 |
+
elif text.startswith("SELL"):
|
| 143 |
+
parts = text.split()
|
| 144 |
+
if len(parts) > 1 and parts[1].isdigit():
|
| 145 |
+
return f"SELL {parts[1]}"
|
| 146 |
+
return "SELL 5"
|
| 147 |
+
elif text.startswith("BACKTEST"):
|
| 148 |
+
parts = text.split()
|
| 149 |
+
if len(parts) > 1:
|
| 150 |
+
return f"BACKTEST {parts[1]}"
|
| 151 |
+
return "BACKTEST momentum"
|
| 152 |
+
elif text.startswith("GET_NEWS"):
|
| 153 |
+
return "GET_NEWS"
|
| 154 |
+
else:
|
| 155 |
+
return "GET_PRICE"
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def fallback_strategy(observation: dict) -> str:
|
| 159 |
+
"""Rule-based strategy when LLM is unavailable"""
|
| 160 |
+
sentiment = observation.get('last_news', {}).get('sentiment', 'neutral')
|
| 161 |
+
if sentiment == 'positive':
|
| 162 |
+
return "BUY 5"
|
| 163 |
+
elif sentiment == 'negative':
|
| 164 |
+
return "SELL 5"
|
| 165 |
+
else:
|
| 166 |
+
return "GET_PRICE"
|
| 167 |
+
|
| 168 |
+
|
| 169 |
def get_model_action(client: OpenAI, step: int, observation: dict, history: List[str]) -> str:
|
| 170 |
"""Get action from LLM using the judge's proxy"""
|
| 171 |
|
| 172 |
+
# If no API credentials, use fallback
|
| 173 |
+
if not API_BASE_URL or not API_KEY:
|
| 174 |
+
print("[DEBUG] No API credentials, using fallback strategy", flush=True)
|
| 175 |
+
return fallback_strategy(observation)
|
| 176 |
+
|
| 177 |
+
# Get news headline for context
|
| 178 |
+
news = observation.get('last_news', {})
|
| 179 |
+
headline = news.get('headline', 'No recent news')
|
| 180 |
+
sentiment = news.get('sentiment', 'neutral')
|
| 181 |
+
|
| 182 |
user_prompt = textwrap.dedent(
|
| 183 |
f"""
|
| 184 |
+
Step: {step} of {MAX_STEPS}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
+
Current Market Data:
|
| 187 |
+
- Price: ${observation.get('price', 'unknown')}
|
| 188 |
+
- Balance: ${observation.get('balance', 'unknown')}
|
| 189 |
+
- Holdings: {observation.get('holdings', 0)} shares
|
| 190 |
+
- Portfolio Value: ${observation.get('portfolio_value', 'unknown')}
|
| 191 |
+
|
| 192 |
+
Latest News:
|
| 193 |
+
- Headline: "{headline}"
|
| 194 |
+
- Sentiment: {sentiment}
|
| 195 |
+
|
| 196 |
+
Previous actions this episode:
|
| 197 |
+
{chr(10).join(history[-5:]) if history else "No previous actions"}
|
| 198 |
+
|
| 199 |
+
Based on this information, what is your next action?
|
| 200 |
+
Respond with EXACTLY one action in format: ACTION [parameter]
|
| 201 |
+
Examples: BUY 10, SELL 5, GET_PRICE, BACKTEST momentum, GET_NEWS
|
| 202 |
"""
|
| 203 |
).strip()
|
| 204 |
|
| 205 |
try:
|
|
|
|
| 206 |
completion = client.chat.completions.create(
|
| 207 |
+
model="gpt-3.5-turbo",
|
| 208 |
messages=[
|
| 209 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 210 |
{"role": "user", "content": user_prompt},
|
|
|
|
| 213 |
max_tokens=MAX_TOKENS,
|
| 214 |
)
|
| 215 |
text = completion.choices[0].message.content or ""
|
| 216 |
+
action = parse_action_from_response(text)
|
| 217 |
+
print(f"[DEBUG] LLM suggested: {text[:100]}... -> {action}", flush=True)
|
| 218 |
+
return action
|
| 219 |
except Exception as e:
|
| 220 |
print(f"[DEBUG] LLM error: {e}, using fallback", flush=True)
|
| 221 |
return fallback_strategy(observation)
|
| 222 |
|
| 223 |
|
| 224 |
+
def calculate_reward(observation: dict, step: int) -> float:
|
| 225 |
+
"""Calculate reward based on portfolio performance and actions"""
|
| 226 |
+
portfolio_value = observation.get('portfolio_value', 10000)
|
| 227 |
+
price = observation.get('price', 150)
|
| 228 |
|
| 229 |
+
# Profit reward (0 to 0.6)
|
| 230 |
+
profit_reward = max(0, (portfolio_value - 10000) / 10000) * 0.6
|
| 231 |
+
|
| 232 |
+
# News sentiment bonus (0 to 0.2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
sentiment = observation.get('last_news', {}).get('sentiment', 'neutral')
|
| 234 |
if sentiment == 'positive':
|
| 235 |
+
sentiment_bonus = 0.2
|
| 236 |
elif sentiment == 'negative':
|
| 237 |
+
sentiment_bonus = -0.1
|
| 238 |
else:
|
| 239 |
+
sentiment_bonus = 0.05
|
| 240 |
+
|
| 241 |
+
# Step completion bonus (0 to 0.2)
|
| 242 |
+
step_bonus = min(0.2, step / MAX_STEPS * 0.2)
|
| 243 |
+
|
| 244 |
+
reward = max(0.0, min(1.0, profit_reward + sentiment_bonus + step_bonus))
|
| 245 |
+
return reward
|
| 246 |
|
| 247 |
|
| 248 |
async def main() -> None:
|
| 249 |
print("[INFO] Starting Quant-Gym Inference", flush=True)
|
| 250 |
+
print(f"[INFO] Python version: {os.sys.version}", flush=True)
|
| 251 |
|
| 252 |
# CRITICAL CHECK: Both environment variables MUST be set
|
| 253 |
if not API_BASE_URL:
|
| 254 |
+
print("[WARNING] API_BASE_URL environment variable not set!", flush=True)
|
| 255 |
+
print("[WARNING] Using fallback strategy without LLM.", flush=True)
|
| 256 |
+
else:
|
| 257 |
+
print(f"[INFO] API_BASE_URL: {API_BASE_URL}", flush=True)
|
| 258 |
|
| 259 |
if not API_KEY:
|
| 260 |
+
print("[WARNING] API_KEY environment variable not set!", flush=True)
|
| 261 |
+
print("[WARNING] Using fallback strategy without LLM.", flush=True)
|
|
|
|
| 262 |
|
| 263 |
+
# Initialize OpenAI client if credentials available
|
| 264 |
+
client = None
|
| 265 |
+
if API_BASE_URL and API_KEY:
|
| 266 |
+
try:
|
| 267 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 268 |
+
print("[INFO] OpenAI client initialized successfully", flush=True)
|
| 269 |
+
except Exception as e:
|
| 270 |
+
print(f"[WARNING] Failed to initialize OpenAI client: {e}", flush=True)
|
| 271 |
|
| 272 |
env = QuantGymClient(BASE_URL)
|
| 273 |
|
|
|
|
| 277 |
success = False
|
| 278 |
final_score = 0.0
|
| 279 |
|
| 280 |
+
log_start(task=TASK_NAME, env=BENCHMARK, model="gpt-3.5-turbo" if client else "fallback-rule-based")
|
| 281 |
|
| 282 |
try:
|
| 283 |
+
# Reset environment
|
| 284 |
result = env.reset()
|
| 285 |
observation = result.get('observation', {})
|
| 286 |
+
print(f"[INFO] Reset complete. Initial price: ${observation.get('price', 'unknown')}", flush=True)
|
| 287 |
|
| 288 |
for step in range(1, MAX_STEPS + 1):
|
| 289 |
+
# Get action from LLM or fallback
|
| 290 |
+
if client:
|
| 291 |
+
action_str = get_model_action(client, step, observation, history)
|
| 292 |
+
else:
|
| 293 |
+
action_str = fallback_strategy(observation)
|
| 294 |
|
| 295 |
+
# Execute action
|
| 296 |
result = env.step(action_str)
|
| 297 |
observation = result.get('observation', {})
|
| 298 |
|
| 299 |
+
# Calculate reward
|
| 300 |
+
reward = calculate_reward(observation, step)
|
|
|
|
| 301 |
|
| 302 |
done = step >= MAX_STEPS - 1
|
| 303 |
error = None
|
|
|
|
| 307 |
|
| 308 |
log_step(step=step, action=action_str, reward=reward, done=done, error=error)
|
| 309 |
|
| 310 |
+
# Update history
|
| 311 |
+
history.append(f"Step {step}: {action_str} -> reward {reward:.2f}")
|
| 312 |
|
| 313 |
if done:
|
| 314 |
break
|
|
|
|
| 318 |
|
| 319 |
except Exception as e:
|
| 320 |
print(f"[ERROR] {e}", flush=True)
|
| 321 |
+
import traceback
|
| 322 |
+
traceback.print_exc()
|
| 323 |
success = False
|
| 324 |
final_score = 0.0
|
| 325 |
finally:
|
openenv.yaml
CHANGED
|
@@ -26,6 +26,11 @@ tasks:
|
|
| 26 |
grader: "task3_grader.grade_task3"
|
| 27 |
max_score: 1.0
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
action_schema:
|
| 30 |
type: "object"
|
| 31 |
properties:
|
|
|
|
| 26 |
grader: "task3_grader.grade_task3"
|
| 27 |
max_score: 1.0
|
| 28 |
|
| 29 |
+
graders:
|
| 30 |
+
task1: "task1_grader.grade_task1"
|
| 31 |
+
task2: "task2_grader.grade_task2"
|
| 32 |
+
task3: "task3_grader.grade_task3"
|
| 33 |
+
|
| 34 |
action_schema:
|
| 35 |
type: "object"
|
| 36 |
properties:
|