Spaces:
Sleeping
Sleeping
File size: 1,918 Bytes
24d9eca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | """Pipeline configuration dataclass for ContextForge v3.0."""
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class PipelineConfig:
"""
Configuration for ContextForge pipeline.
All values have sane defaults; only model_id is required.
Usage:
config = PipelineConfig(
model_id="Qwen/Qwen3-235B-A22B",
vram_budget_tokens=50_000_000,
)
pipeline = Pipeline(config=config)
"""
# Model configuration
model_id: str = "Qwen/Qwen3-235B-A22B"
# LSHTokenMatcher configuration
block_size: int = 16 # vLLM PagedAttention block size
hamming_threshold: int = 8 # <8 bits different = high confidence
# VRAMAwareCache configuration
vram_budget_tokens: int = 50_000_000 # ~3GB for 64-layer model
# FAISS configuration
faiss_dim: int = 384 # all-MiniLM-L6-v2 embedding dimension
faiss_nlist: int = 100 # IVF cluster count (sqrt of expected entries)
# Compression configuration
compression_min_tokens: int = 512
compression_emergency_threshold: float = 0.85 # VRAM pressure threshold
# VRAM monitoring
vram_check_interval: float = 2.0 # seconds between VRAM pressure checks
# Anchor pool (KV offset alignment)
anchor_pool_max_size: int = 20 # max anchors before LFU pruning
def validate(self) -> None:
"""Validate configuration consistency."""
if self.block_size < 1:
raise ValueError(f"block_size must be >= 1, got {self.block_size}")
if self.hamming_threshold < 1:
raise ValueError(f"hamming_threshold must be >= 1, got {self.hamming_threshold}")
if self.vram_budget_tokens < 1000:
raise ValueError(f"vram_budget_tokens must be >= 1000, got {self.vram_budget_tokens}")
if self.faiss_dim < 1:
raise ValueError(f"faiss_dim must be >= 1, got {self.faiss_dim}") |