oktoscript / examples /monitor-full.okt
OktoSeek's picture
Upload 48 files
5fc8c9d verified
# okto_version: "1.2"
PROJECT "MonitorFullExample"
DESCRIPTION "Demonstrates complete MONITOR block with all metrics"
ENV {
accelerator: "gpu"
min_memory: "16GB"
precision: "fp16"
}
DATASET {
train: "examples/datasets/demo_train.jsonl"
validation: "examples/datasets/demo_train.jsonl"
format: "jsonl"
type: "chat"
}
MODEL {
name: "monitor-full-model"
base: "oktoseek/base-mini"
device: "cuda"
}
TRAIN {
epochs: 10
batch_size: 32
learning_rate: 0.0001
device: "cuda"
}
MONITOR {
metrics: [
"loss",
"val_loss",
"accuracy",
"val_accuracy",
"precision",
"recall",
"f1_score",
"perplexity",
"confidence",
"hallucination_score"
]
notify_if {
loss > 2.0
gpu_usage > 90%
gpu_temperature > 85
val_loss > 2.5
hallucination_score > 0.5
}
log_system: [
"gpu_usage",
"gpu_memory_used",
"gpu_memory_free",
"gpu_temperature",
"cpu_usage",
"ram_usage"
]
log_speed: [
"tokens_per_second",
"samples_per_second",
"throughput",
"latency",
"step_time"
]
refresh_interval: 2s
export_to: "runs/logs/system.json"
dashboard: true
log_to: "logs/training.log"
}
CONTROL {
on_epoch_end {
IF gpu_temperature > 85 {
SET batch_size = 16
LOG "GPU temperature high, reducing batch size"
}
}
}
EXPORT {
format: ["okm"]
path: "export/"
}