Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
|
@@ -56,8 +56,8 @@ except Exception as e:
|
|
| 56 |
MAX_SEQ_LEN = 1024
|
| 57 |
SFT_STEPS = 80 # Increased warmup for JSON format - key fix!
|
| 58 |
GRPO_STEPS = 250
|
| 59 |
-
GRPO_K =
|
| 60 |
-
GRPO_LR =
|
| 61 |
CURRICULUM_SWITCH = 0 # Start with Level 1, advance early
|
| 62 |
GRAD_CLIP = 1.0
|
| 63 |
SAVE_EVERY = 50
|
|
|
|
| 56 |
MAX_SEQ_LEN = 1024
|
| 57 |
SFT_STEPS = 80 # Increased warmup for JSON format - key fix!
|
| 58 |
GRPO_STEPS = 250
|
| 59 |
+
GRPO_K = 4
|
| 60 |
+
GRPO_LR = 5e-6 # Slightly higher LR for faster initial learning
|
| 61 |
CURRICULUM_SWITCH = 0 # Start with Level 1, advance early
|
| 62 |
GRAD_CLIP = 1.0
|
| 63 |
SAVE_EVERY = 50
|