updates tasks
Browse files- task1_grader.py +29 -13
- task2_grader.py +47 -19
- task3_grader.py +42 -18
task1_grader.py
CHANGED
|
@@ -1,20 +1,36 @@
|
|
| 1 |
def grade_task1(action, observation):
|
| 2 |
"""
|
| 3 |
Task 1: Fetch Market Data
|
| 4 |
-
|
| 5 |
"""
|
| 6 |
-
score = 0.
|
| 7 |
|
| 8 |
-
# Check if
|
| 9 |
-
if
|
| 10 |
-
score = 0.
|
| 11 |
-
else:
|
| 12 |
-
score = 0.55
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
def grade_task1(action, observation):
|
| 2 |
"""
|
| 3 |
Task 1: Fetch Market Data
|
| 4 |
+
Grades: Speed, accuracy, and completeness of data retrieval
|
| 5 |
"""
|
| 6 |
+
score = 0.0
|
| 7 |
|
| 8 |
+
# Check if action was GET_PRICE (0-0.3 points)
|
| 9 |
+
if action and action.get("type") == "GET_PRICE":
|
| 10 |
+
score += 0.3
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# Check if price exists and is reasonable (0-0.4 points)
|
| 13 |
+
price = observation.get("price", 0) if observation else 0
|
| 14 |
+
if price > 0:
|
| 15 |
+
# Price accuracy - closer to expected range is better
|
| 16 |
+
if 100 < price < 200: # Apple's typical range
|
| 17 |
+
score += 0.3
|
| 18 |
+
else:
|
| 19 |
+
score += 0.2
|
| 20 |
+
score += 0.1 # Bonus for having any price
|
| 21 |
|
| 22 |
+
# Check if timestamp is provided (0-0.2 points)
|
| 23 |
+
timestamp = observation.get("timestamp", "") if observation else ""
|
| 24 |
+
if timestamp and len(timestamp) > 0:
|
| 25 |
+
score += 0.15
|
| 26 |
+
|
| 27 |
+
# Bonus for getting additional data (0-0.1 points)
|
| 28 |
+
if observation and observation.get("volume"):
|
| 29 |
+
score += 0.05
|
| 30 |
+
if observation and observation.get("high") and observation.get("low"):
|
| 31 |
+
score += 0.05
|
| 32 |
+
|
| 33 |
+
# Ensure score is strictly between 0 and 1
|
| 34 |
+
score = max(0.01, min(0.99, score))
|
| 35 |
+
|
| 36 |
+
return round(score, 2)
|
task2_grader.py
CHANGED
|
@@ -1,25 +1,53 @@
|
|
| 1 |
def grade_task2(action, observation):
|
| 2 |
"""
|
| 3 |
-
Task 2: News Sentiment Analysis
|
| 4 |
-
|
|
|
|
| 5 |
"""
|
| 6 |
-
score = 0.
|
| 7 |
-
|
| 8 |
-
#
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
| 17 |
else:
|
| 18 |
-
score = 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
if score >= 1.0:
|
| 23 |
-
score = 0.99
|
| 24 |
|
| 25 |
-
return score
|
|
|
|
| 1 |
def grade_task2(action, observation):
|
| 2 |
"""
|
| 3 |
+
Task 2: News Sentiment Analysis with Explanation
|
| 4 |
+
Grades: Quality of reasoning, financial knowledge, clarity
|
| 5 |
+
This is your unique innovation!
|
| 6 |
"""
|
| 7 |
+
score = 0.0
|
| 8 |
+
|
| 9 |
+
# Get the agent's explanation
|
| 10 |
+
explanation = action.get('explanation', '') if action else ''
|
| 11 |
+
|
| 12 |
+
# 1. Check explanation length and detail (0-0.3 points)
|
| 13 |
+
if len(explanation) > 100:
|
| 14 |
+
score += 0.3
|
| 15 |
+
elif len(explanation) > 50:
|
| 16 |
+
score += 0.2
|
| 17 |
+
elif len(explanation) > 20:
|
| 18 |
+
score += 0.1
|
| 19 |
else:
|
| 20 |
+
score += 0.02
|
| 21 |
+
|
| 22 |
+
# 2. Check for financial terminology (0-0.3 points)
|
| 23 |
+
financial_terms = [
|
| 24 |
+
'pe', 'p/e', 'valuation', 'earnings',
|
| 25 |
+
'moving average', 'trend', 'momentum', 'rsi',
|
| 26 |
+
'support', 'resistance', 'breakout',
|
| 27 |
+
'risk', 'volatility', 'drawdown', 'sharpe',
|
| 28 |
+
'sentiment', 'market cap', 'liquidity'
|
| 29 |
+
]
|
| 30 |
+
terms_found = sum(1 for term in financial_terms if term in explanation.lower())
|
| 31 |
+
score += min(0.3, terms_found * 0.05)
|
| 32 |
+
|
| 33 |
+
# 3. Check for logical reasoning (0-0.2 points)
|
| 34 |
+
reasoning_words = ['because', 'therefore', 'since', 'due to', 'based on', 'as a result']
|
| 35 |
+
if any(word in explanation.lower() for word in reasoning_words):
|
| 36 |
+
score += 0.2
|
| 37 |
+
|
| 38 |
+
# 4. Check for specific data references (0-0.1 points)
|
| 39 |
+
import re
|
| 40 |
+
if re.search(r'\d+', explanation): # Contains numbers
|
| 41 |
+
score += 0.05
|
| 42 |
+
if '%' in explanation:
|
| 43 |
+
score += 0.05
|
| 44 |
+
|
| 45 |
+
# 5. Check if recommendation is clear (0-0.1 points)
|
| 46 |
+
recommendations = ['buy', 'sell', 'hold', 'accumulate', 'reduce']
|
| 47 |
+
if any(word in explanation.lower() for word in recommendations):
|
| 48 |
+
score += 0.1
|
| 49 |
|
| 50 |
+
# Ensure score is strictly between 0 and 1
|
| 51 |
+
score = max(0.01, min(0.99, score))
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
return round(score, 2)
|
task3_grader.py
CHANGED
|
@@ -1,25 +1,49 @@
|
|
| 1 |
def grade_task3(action, observation):
|
| 2 |
"""
|
| 3 |
Task 3: Backtest Strategy
|
| 4 |
-
|
| 5 |
"""
|
| 6 |
-
score = 0.
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
|
| 10 |
-
results = observation.get('backtest_results', {})
|
| 11 |
-
if results.get('sharpe_ratio', 0) > 1.0:
|
| 12 |
-
score = 0.95
|
| 13 |
-
elif results.get('sharpe_ratio', 0) > 0.5:
|
| 14 |
-
score = 0.85
|
| 15 |
-
else:
|
| 16 |
-
score = 0.65
|
| 17 |
-
else:
|
| 18 |
-
score = 0.55
|
| 19 |
|
| 20 |
-
if
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
score = 0.99
|
| 24 |
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
def grade_task3(action, observation):
|
| 2 |
"""
|
| 3 |
Task 3: Backtest Strategy
|
| 4 |
+
Grades: Strategy sophistication, risk awareness, completeness
|
| 5 |
"""
|
| 6 |
+
score = 0.0
|
| 7 |
|
| 8 |
+
# Get backtest results
|
| 9 |
+
backtest_results = observation.get("backtest_results", {}) if observation else {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
# 1. Check if backtest was performed (0-0.3 points)
|
| 12 |
+
if backtest_results:
|
| 13 |
+
score += 0.3
|
|
|
|
| 14 |
|
| 15 |
+
# 2. Check Sharpe ratio (0-0.3 points)
|
| 16 |
+
sharpe = backtest_results.get("sharpe_ratio", 0)
|
| 17 |
+
if sharpe > 1.5:
|
| 18 |
+
score += 0.3
|
| 19 |
+
elif sharpe > 1.0:
|
| 20 |
+
score += 0.25
|
| 21 |
+
elif sharpe > 0.5:
|
| 22 |
+
score += 0.15
|
| 23 |
+
elif sharpe > 0:
|
| 24 |
+
score += 0.05
|
| 25 |
+
|
| 26 |
+
# 3. Check max drawdown (0-0.2 points)
|
| 27 |
+
drawdown = backtest_results.get("max_drawdown", 1)
|
| 28 |
+
if drawdown < 0.1:
|
| 29 |
+
score += 0.2
|
| 30 |
+
elif drawdown < 0.2:
|
| 31 |
+
score += 0.15
|
| 32 |
+
elif drawdown < 0.3:
|
| 33 |
+
score += 0.1
|
| 34 |
+
elif drawdown < 0.5:
|
| 35 |
+
score += 0.05
|
| 36 |
+
|
| 37 |
+
# 4. Check strategy description (0-0.2 points)
|
| 38 |
+
strategy = action.get("strategy", "") if action else ""
|
| 39 |
+
if strategy:
|
| 40 |
+
score += 0.1
|
| 41 |
+
# Sophisticated strategy names get bonus
|
| 42 |
+
advanced_strategies = ['momentum', 'mean reversion', 'arbitrage', 'pair trading', 'options']
|
| 43 |
+
if any(s in strategy.lower() for s in advanced_strategies):
|
| 44 |
+
score += 0.1
|
| 45 |
+
|
| 46 |
+
# Ensure score is strictly between 0 and 1
|
| 47 |
+
score = max(0.01, min(0.99, score))
|
| 48 |
+
|
| 49 |
+
return round(score, 2)
|