| """ |
| Training script β Train the RL policy for yield optimization. |
| """ |
|
|
| import logging |
| import os |
| import sys |
| import json |
|
|
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| from agent.main import YieldRouterAgent, setup_logging |
| from agent.rl_optimizer import PPOYieldOptimizer, RWAYieldEnv, Backtester |
|
|
|
|
| def main(): |
| setup_logging() |
| logger = logging.getLogger("train") |
| |
| logger.info("=" * 60) |
| logger.info("Dynamic RWA Yield Router β RL Training") |
| logger.info("=" * 60) |
| |
| |
| total_timesteps = int(os.getenv("TRAIN_STEPS", "100000")) |
| |
| optimizer = PPOYieldOptimizer( |
| total_timesteps=total_timesteps, |
| learning_rate=3e-4, |
| n_steps=2048, |
| batch_size=64, |
| ) |
| |
| |
| logger.info(f"\nπ Training for {total_timesteps} timesteps...\n") |
| optimizer.train(total_timesteps=total_timesteps) |
| |
| |
| logger.info("\nπ Running backtest...\n") |
| env = RWAYieldEnv(episode_length=720) |
| backtester = Backtester(optimizer, env) |
| results = backtester.run_backtest(n_episodes=10) |
| |
| if results.get("rl_agent"): |
| import numpy as np |
| returns = [r["total_return"] for r in results["rl_agent"]] |
| sharpes = [r["sharpe"] for r in results["rl_agent"]] |
| drawdowns = [r["max_drawdown"] for r in results["rl_agent"]] |
| |
| print("\n" + "=" * 60) |
| print("π BACKTEST RESULTS (10 episodes)") |
| print("=" * 60) |
| print(f" Avg Return: {np.mean(returns):+.2f}%") |
| print(f" Std Return: {np.std(returns):.2f}%") |
| print(f" Avg Sharpe: {np.mean(sharpes):.3f}") |
| print(f" Avg Max DD: {np.mean(drawdowns)*100:.2f}%") |
| print(f" Best Return: {max(returns):+.2f}%") |
| print(f" Worst Return: {min(returns):+.2f}%") |
| |
| logger.info("\nβ
Training complete!") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|