Anonymous Hunter

feat: Add robust configuration management, Docker support, initial testing, and quickstart documentation.

f21249a 3 months ago

1.84 kB

	# KerdosAI Default Configuration

	# Model Configuration
	base_model: "gpt2" # Base model name or path
	model_revision: null # Specific model revision/commit
	trust_remote_code: false # Whether to trust remote code
	device: null # Device to use (cuda/cpu), null for auto-detection

	# LoRA Configuration
	lora:
	enabled: true
	r: 8 # LoRA rank
	alpha: 32 # LoRA alpha
	dropout: 0.1 # LoRA dropout
	target_modules: null # Auto-detect if null

	# Quantization Configuration
	quantization:
	enabled: false
	bits: 4 # 4 or 8
	use_double_quant: true
	quant_type: "nf4" # nf4 or fp4
	compute_dtype: "float16" # float16, bfloat16, or float32

	# Training Configuration
	training:
	epochs: 3
	batch_size: 4
	learning_rate: 0.00002 # 2e-5
	warmup_steps: 100
	gradient_accumulation_steps: 1
	max_grad_norm: 1.0
	weight_decay: 0.01
	logging_steps: 10
	save_steps: 100
	eval_steps: 100
	max_seq_length: 512
	seed: 42
	fp16: false
	bf16: false

	# Data Configuration
	data:
	train_file: null # Path to training data
	validation_file: null # Path to validation data
	test_file: null # Path to test data
	dataset_name: null # HuggingFace dataset name
	dataset_config: null # Dataset configuration
	text_column: "text" # Column name for text data
	max_samples: null # Limit number of samples (null for all)
	preprocessing_num_workers: 4

	# Deployment Configuration
	deployment:
	type: "rest" # rest, docker, or kubernetes
	host: "0.0.0.0"
	port: 8000
	workers: 1
	max_batch_size: 8
	timeout: 60

	# Monitoring Configuration
	monitoring:
	enabled: true
	wandb_project: null # W&B project name
	wandb_entity: null # W&B entity/team name
	tensorboard_dir: "./runs"
	log_model: false

	# Output Configuration
	output_dir: "./output"
	checkpoint_dir: "./checkpoints"
	cache_dir: null # HuggingFace cache directory