| # okto_version: "1.2"
|
|
|
| # Teste 4: Flan-T5 Completo - Todos os Blocos
|
| # Modelo: google/flan-t5-base
|
| # Objetivo: Testar todos os blocos avançados juntos
|
|
|
| PROJECT "test_flan_t5_complete"
|
| DESCRIPTION "Teste completo Flan-T5 com todos os blocos v1.2"
|
|
|
| ENV {
|
| accelerator: "gpu"
|
| min_memory: "8GB"
|
| precision: "fp16"
|
| backend: "oktoseek"
|
| install_missing: true
|
| }
|
|
|
| DATASET {
|
| train: "dataset/train.jsonl"
|
| validation: "dataset/val.jsonl"
|
| }
|
|
|
| MODEL {
|
| base: "google/flan-t5-base"
|
| device: "auto"
|
| }
|
|
|
| TRAIN {
|
| epochs: 5
|
| batch_size: 16
|
| learning_rate: 0.0001
|
| device: "auto"
|
| }
|
|
|
| MONITOR {
|
| metrics: [
|
| "loss",
|
| "val_loss",
|
| "accuracy",
|
| "perplexity",
|
| "gpu_usage",
|
| "ram_usage",
|
| "throughput",
|
| "latency",
|
| "confidence"
|
| ]
|
| notify_if {
|
| loss > 2.0
|
| val_loss > 2.5
|
| gpu_usage > 90%
|
| ram_usage > 80%
|
| }
|
| log_to: "logs/training_complete.log"
|
| }
|
|
|
| CONTROL {
|
| on_step_end {
|
| LOG loss
|
| }
|
|
|
| on_epoch_end {
|
| SAVE model
|
| LOG "Epoch completed"
|
|
|
| IF loss > 1.5 {
|
| SET LR
|
| LOG
|
| }
|
|
|
| IF
|
| SAVE
|
| LOG
|
| }
|
| }
|
|
|
| validate_every: 200
|
|
|
| IF
|
| SET
|
| LOG
|
| }
|
|
|
| IF
|
| STOP_TRAINING
|
| LOG
|
| }
|
|
|
| WHEN
|
| SET
|
| LOG
|
| }
|
|
|
| EVERY
|
| SAVE
|
| LOG
|
| }
|
| }
|
|
|
| STABILITY
|
| stop_if_nan: true
|
| stop_if_diverges: true
|
| min_improvement: 0.001
|
| }
|
|
|
| EXPORT
|
| format: ["okm", "safetensors"]
|
| path: "export/"
|
| }
|
|
|
|
|
|
|
|
|
|
|