contextflow-rl / evaluation_results.json
namish10's picture
Upload evaluation_results.json with huggingface_hub
6bd4a66 verified
{
"model_info": {
"name": "contextflow-rl",
"version": "50",
"algorithm": "GRPO + Q-Learning",
"training_samples": 200,
"total_epochs": 5,
"final_loss": 0.2465
},
"training_history": [
{"epoch": 1, "loss": 1.2456, "epsilon": 1.0, "avg_reward": 0.2},
{"epoch": 2, "loss": 0.8923, "epsilon": 0.995, "avg_reward": 0.35},
{"epoch": 3, "loss": 0.6541, "epsilon": 0.990, "avg_reward": 0.48},
{"epoch": 4, "loss": 0.4127, "epsilon": 0.985, "avg_reward": 0.62},
{"epoch": 5, "loss": 0.2465, "epsilon": 0.980, "avg_reward": 0.75}
],
"q_network_config": {
"state_dim": 64,
"action_dim": 10,
"hidden_dim": 128,
"learning_rate": 0.001,
"gamma": 0.95,
"epsilon_start": 1.0,
"epsilon_end": 0.01,
"epsilon_decay": 0.995
},
"actions": [
"what_is_backpropagation",
"why_gradient_descent",
"how_overfitting_works",
"explain_regularization",
"what_loss_function",
"how_optimization_works",
"explain_learning_rate",
"what_regularization",
"how_batch_norm_works",
"explain_softmax"
],
"sample_predictions": [
{
"scenario": "beginner_ml_student",
"state_summary": {
"progress": 0.3,
"confusion_signals": 3.0,
"gesture_signals": 2.0
},
"prediction": "what_is_backpropagation",
"confidence": 0.72
},
{
"scenario": "advanced_struggling",
"state_summary": {
"progress": 0.7,
"confusion_signals": 4.5,
"gesture_signals": 8.0
},
"prediction": "how_overfitting_works",
"confidence": 0.85
},
{
"scenario": "quick_learner",
"state_summary": {
"progress": 0.9,
"confusion_signals": 0.5,
"gesture_signals": 3.0
},
"prediction": "explain_softmax",
"confidence": 0.45
}
],
"limitations": [
"Trained on 200 synthetic samples",
"Limited real-world behavioral data",
"No hyperparameter tuning performed",
"Requires MediaPipe for gesture recognition"
],
"recommended_next_steps": [
"Collect real learning session data",
"Increase training samples to 10000+",
"Add hyperparameter optimization",
"Implement online learning"
]
}