Anonymous Hunter
feat: Add robust configuration management, Docker support, initial testing, and quickstart documentation.
f21249a | # KerdosAI Default Configuration | |
| # Model Configuration | |
| base_model: "gpt2" # Base model name or path | |
| model_revision: null # Specific model revision/commit | |
| trust_remote_code: false # Whether to trust remote code | |
| device: null # Device to use (cuda/cpu), null for auto-detection | |
| # LoRA Configuration | |
| lora: | |
| enabled: true | |
| r: 8 # LoRA rank | |
| alpha: 32 # LoRA alpha | |
| dropout: 0.1 # LoRA dropout | |
| target_modules: null # Auto-detect if null | |
| # Quantization Configuration | |
| quantization: | |
| enabled: false | |
| bits: 4 # 4 or 8 | |
| use_double_quant: true | |
| quant_type: "nf4" # nf4 or fp4 | |
| compute_dtype: "float16" # float16, bfloat16, or float32 | |
| # Training Configuration | |
| training: | |
| epochs: 3 | |
| batch_size: 4 | |
| learning_rate: 0.00002 # 2e-5 | |
| warmup_steps: 100 | |
| gradient_accumulation_steps: 1 | |
| max_grad_norm: 1.0 | |
| weight_decay: 0.01 | |
| logging_steps: 10 | |
| save_steps: 100 | |
| eval_steps: 100 | |
| max_seq_length: 512 | |
| seed: 42 | |
| fp16: false | |
| bf16: false | |
| # Data Configuration | |
| data: | |
| train_file: null # Path to training data | |
| validation_file: null # Path to validation data | |
| test_file: null # Path to test data | |
| dataset_name: null # HuggingFace dataset name | |
| dataset_config: null # Dataset configuration | |
| text_column: "text" # Column name for text data | |
| max_samples: null # Limit number of samples (null for all) | |
| preprocessing_num_workers: 4 | |
| # Deployment Configuration | |
| deployment: | |
| type: "rest" # rest, docker, or kubernetes | |
| host: "0.0.0.0" | |
| port: 8000 | |
| workers: 1 | |
| max_batch_size: 8 | |
| timeout: 60 | |
| # Monitoring Configuration | |
| monitoring: | |
| enabled: true | |
| wandb_project: null # W&B project name | |
| wandb_entity: null # W&B entity/team name | |
| tensorboard_dir: "./runs" | |
| log_model: false | |
| # Output Configuration | |
| output_dir: "./output" | |
| checkpoint_dir: "./checkpoints" | |
| cache_dir: null # HuggingFace cache directory | |