""" Training script — Train the RL policy for yield optimization. """ import logging import os import sys import json sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from agent.main import YieldRouterAgent, setup_logging from agent.rl_optimizer import PPOYieldOptimizer, RWAYieldEnv, Backtester def main(): setup_logging() logger = logging.getLogger("train") logger.info("=" * 60) logger.info("Dynamic RWA Yield Router — RL Training") logger.info("=" * 60) # Initialize optimizer total_timesteps = int(os.getenv("TRAIN_STEPS", "100000")) optimizer = PPOYieldOptimizer( total_timesteps=total_timesteps, learning_rate=3e-4, n_steps=2048, batch_size=64, ) # Train logger.info(f"\nšŸŽ“ Training for {total_timesteps} timesteps...\n") optimizer.train(total_timesteps=total_timesteps) # Backtest logger.info("\nšŸ“Š Running backtest...\n") env = RWAYieldEnv(episode_length=720) backtester = Backtester(optimizer, env) results = backtester.run_backtest(n_episodes=10) if results.get("rl_agent"): import numpy as np returns = [r["total_return"] for r in results["rl_agent"]] sharpes = [r["sharpe"] for r in results["rl_agent"]] drawdowns = [r["max_drawdown"] for r in results["rl_agent"]] print("\n" + "=" * 60) print("šŸ“Š BACKTEST RESULTS (10 episodes)") print("=" * 60) print(f" Avg Return: {np.mean(returns):+.2f}%") print(f" Std Return: {np.std(returns):.2f}%") print(f" Avg Sharpe: {np.mean(sharpes):.3f}") print(f" Avg Max DD: {np.mean(drawdowns)*100:.2f}%") print(f" Best Return: {max(returns):+.2f}%") print(f" Worst Return: {min(returns):+.2f}%") logger.info("\nāœ… Training complete!") if __name__ == "__main__": main()