| """ |
| neural_config.py — Configuration and hyperparameters for MLX LoRA training. |
| """ |
|
|
| from dataclasses import dataclass, field |
| from pathlib import Path |
| import json |
| import os |
|
|
|
|
| @dataclass |
| class NeuralConfig: |
| """Training hyperparameters and daemon configuration.""" |
|
|
| |
| daemon_port: int = 8766 |
| daemon_host: str = "0.0.0.0" |
|
|
| |
| model_key: str = "" |
| model_path: str = "" |
| model_architecture: str = "" |
|
|
| |
| lora_rank: int = 32 |
| lora_alpha: float = 32.0 |
| lora_targets: list = field(default_factory=lambda: ["q_proj", "v_proj", "out_proj", "down_proj"]) |
| lora_dropout: float = 0.0 |
| lora_num_layers: int = -1 |
|
|
| |
| training_backend: str = "mlx" |
| learning_rate: float = 5e-4 |
| min_learning_rate: float = 5e-5 |
| cosine_period_steps: int = 5000 |
| warmup_fraction: float = 0.1 |
| steps_per_cycle: int = 1 |
| batch_size: int = 0 |
| epochs_per_cycle: int = 1 |
| train_epochs: int = 15 |
| early_stop_loss: float = 0.8 |
| early_stop_patience: int = 2 |
| max_seq_len: int = 512 |
| gradient_clip: float = 1.0 |
| warmup_steps: int = 10 |
| auto_train: bool = True |
| replay_ratio: float = 0.3 |
|
|
| |
| adam_beta1: float = 0.9 |
| adam_beta2: float = 0.999 |
| adam_eps: float = 1e-8 |
| weight_decay: float = 0.0 |
|
|
| |
| rolling_buffer_size: int = 100 |
| replay_buffer_size: int = 500 |
| min_response_tokens: int = 10 |
|
|
| |
| ane_compile_budget: int = 110 |
| ane_min_tensor_dim: int = 16 |
| ane_seq_len: int = 16 |
|
|
| |
| base_dir: str = "~/.jarvis/fine-tune" |
| adapter_dir: str = "" |
| replay_path: str = "" |
| auto_save_interval: int = 10 |
|
|
| |
| lms_cli_path: str = "" |
| lms_api_url: str = "http://localhost:1234" |
|
|
| @property |
| def lora_scaling(self) -> float: |
| return self.lora_alpha / self.lora_rank |
|
|
| def resolve_paths(self): |
| """Expand ~ and set dynamic paths.""" |
| self.base_dir = str(Path(self.base_dir).expanduser()) |
| if not self.adapter_dir: |
| key = self.model_key or "default" |
| self.adapter_dir = str(Path(self.base_dir) / "adapters" / key) |
| if not self.replay_path: |
| self.replay_path = str(Path(self.base_dir) / "replay.jsonl") |
|
|
| |
| if not self.lms_cli_path: |
| candidates = [ |
| Path.home() / ".lmstudio" / "bin" / "lms", |
| Path("/usr/local/bin/lms"), |
| ] |
| for c in candidates: |
| if c.exists(): |
| self.lms_cli_path = str(c) |
| break |
|
|
| def ensure_dirs(self): |
| """Create required directories.""" |
| self.resolve_paths() |
| Path(self.base_dir).mkdir(parents=True, exist_ok=True) |
| Path(self.adapter_dir).mkdir(parents=True, exist_ok=True) |
|
|
| def save(self, path: str = ""): |
| """Save config to JSON.""" |
| path = path or str(Path(self.base_dir) / "config.json") |
| self.resolve_paths() |
| Path(path).parent.mkdir(parents=True, exist_ok=True) |
| with open(path, "w") as f: |
| json.dump(self.__dict__, f, indent=2) |
|
|
| @classmethod |
| def load(cls, path: str) -> "NeuralConfig": |
| """Load config from JSON.""" |
| with open(path) as f: |
| data = json.load(f) |
| cfg = cls() |
| for k, v in data.items(): |
| if hasattr(cfg, k): |
| setattr(cfg, k, v) |
| cfg.resolve_paths() |
| return cfg |
|
|
| def to_dict(self) -> dict: |
| """Convert to dict for API responses.""" |
| self.resolve_paths() |
| d = self.__dict__.copy() |
| d["lora_scaling"] = self.lora_scaling |
| return d |
|
|
| def update_from_dict(self, data: dict): |
| """Update config from API request.""" |
| allowed = { |
| "learning_rate", "min_learning_rate", "cosine_period_steps", |
| "warmup_fraction", "steps_per_cycle", "lora_rank", "lora_alpha", |
| "lora_targets", "lora_num_layers", "training_backend", |
| "auto_train", "replay_ratio", "gradient_clip", "warmup_steps", |
| "rolling_buffer_size", "min_response_tokens", "auto_save_interval", |
| "max_seq_len", "lora_dropout", "weight_decay", |
| "epochs_per_cycle", "train_epochs", |
| "early_stop_loss", "early_stop_patience", |
| } |
| for k, v in data.items(): |
| if k in allowed and hasattr(self, k): |
| setattr(self, k, v) |
|
|