| { |
| "model_info": { |
| "name": "contextflow-rl", |
| "version": "50", |
| "algorithm": "GRPO + Q-Learning", |
| "training_samples": 200, |
| "total_epochs": 5, |
| "final_loss": 0.2465 |
| }, |
| "training_history": [ |
| {"epoch": 1, "loss": 1.2456, "epsilon": 1.0, "avg_reward": 0.2}, |
| {"epoch": 2, "loss": 0.8923, "epsilon": 0.995, "avg_reward": 0.35}, |
| {"epoch": 3, "loss": 0.6541, "epsilon": 0.990, "avg_reward": 0.48}, |
| {"epoch": 4, "loss": 0.4127, "epsilon": 0.985, "avg_reward": 0.62}, |
| {"epoch": 5, "loss": 0.2465, "epsilon": 0.980, "avg_reward": 0.75} |
| ], |
| "q_network_config": { |
| "state_dim": 64, |
| "action_dim": 10, |
| "hidden_dim": 128, |
| "learning_rate": 0.001, |
| "gamma": 0.95, |
| "epsilon_start": 1.0, |
| "epsilon_end": 0.01, |
| "epsilon_decay": 0.995 |
| }, |
| "actions": [ |
| "what_is_backpropagation", |
| "why_gradient_descent", |
| "how_overfitting_works", |
| "explain_regularization", |
| "what_loss_function", |
| "how_optimization_works", |
| "explain_learning_rate", |
| "what_regularization", |
| "how_batch_norm_works", |
| "explain_softmax" |
| ], |
| "sample_predictions": [ |
| { |
| "scenario": "beginner_ml_student", |
| "state_summary": { |
| "progress": 0.3, |
| "confusion_signals": 3.0, |
| "gesture_signals": 2.0 |
| }, |
| "prediction": "what_is_backpropagation", |
| "confidence": 0.72 |
| }, |
| { |
| "scenario": "advanced_struggling", |
| "state_summary": { |
| "progress": 0.7, |
| "confusion_signals": 4.5, |
| "gesture_signals": 8.0 |
| }, |
| "prediction": "how_overfitting_works", |
| "confidence": 0.85 |
| }, |
| { |
| "scenario": "quick_learner", |
| "state_summary": { |
| "progress": 0.9, |
| "confusion_signals": 0.5, |
| "gesture_signals": 3.0 |
| }, |
| "prediction": "explain_softmax", |
| "confidence": 0.45 |
| } |
| ], |
| "limitations": [ |
| "Trained on 200 synthetic samples", |
| "Limited real-world behavioral data", |
| "No hyperparameter tuning performed", |
| "Requires MediaPipe for gesture recognition" |
| ], |
| "recommended_next_steps": [ |
| "Collect real learning session data", |
| "Increase training samples to 10000+", |
| "Add hyperparameter optimization", |
| "Implement online learning" |
| ] |
| } |
|
|