Spaces:
Running
Running
File size: 4,755 Bytes
dc71cad e63f982 dc71cad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | """
configs/settings.py
βββββββββββββββββββ
Centralised, validated configuration using Pydantic-Settings.
All values come from environment variables or .env file.
"""
from pathlib import Path
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
# ββ LLM βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
openai_api_key: str = Field(default="", alias="OPENAI_API_KEY")
groq_api_key: str = Field(default="", alias="GROQ_API_KEY")
llm_provider: str = Field(default="openai", alias="LLM_PROVIDER") # openai | groq | gemini | ollama
llm_model: str = Field(default="gpt-4o", alias="LLM_MODEL")
llm_max_tokens: int = Field(default=4096, alias="LLM_MAX_TOKENS")
llm_temperature: float = Field(default=0.2, alias="LLM_TEMPERATURE")
# ββ SWE-bench ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
swebench_dataset: str = Field(
default="princeton-nlp/SWE-bench_Lite", alias="SWEBENCH_DATASET"
)
swebench_split: str = Field(default="test", alias="SWEBENCH_SPLIT")
results_dir: Path = Field(default=Path("./results"), alias="RESULTS_DIR")
# ββ Sandbox ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
sandbox_image: str = Field(
default="code-agent-sandbox:latest", alias="SANDBOX_IMAGE"
)
sandbox_timeout: int = Field(default=60, alias="SANDBOX_TIMEOUT")
sandbox_memory_limit: str = Field(default="2g", alias="SANDBOX_MEMORY_LIMIT")
sandbox_cpu_limit: float = Field(default=2.0, alias="SANDBOX_CPU_LIMIT")
sandbox_network: str = Field(default="none", alias="SANDBOX_NETWORK")
# ββ Caching ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
redis_url: str = Field(default="redis://localhost:6379/0", alias="REDIS_URL")
diskcache_dir: Path = Field(default=Path("./.cache/diskcache"), alias="DISKCACHE_DIR")
# ββ MLflow βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
mlflow_tracking_uri: str = Field(default="./mlruns", alias="MLFLOW_TRACKING_URI")
mlflow_experiment_name: str = Field(
default="code-agent-baseline", alias="MLFLOW_EXPERIMENT_NAME"
)
# ββ Retrieval βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
embedding_model: str = Field(
default="text-embedding-3-small", alias="EMBEDDING_MODEL"
)
bm25_top_k: int = Field(default=20, alias="BM25_TOP_K")
retrieval_top_k: int = Field(default=5, alias="RETRIEVAL_TOP_K")
rrf_alpha_bm25: float = Field(default=0.4, alias="RRF_ALPHA_BM25")
rrf_alpha_embed: float = Field(default=0.4, alias="RRF_ALPHA_EMBED")
rrf_alpha_ppr: float = Field(default=0.2, alias="RRF_ALPHA_PPR")
# ββ Agent Loop ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
max_attempts: int = Field(default=3, alias="MAX_ATTEMPTS")
max_file_tokens: int = Field(default=2000, alias="MAX_FILE_TOKENS")
# ββ API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
api_host: str = Field(default="0.0.0.0", alias="API_HOST")
api_port: int = Field(default=8000, alias="API_PORT")
celery_broker_url: str = Field(
default="redis://localhost:6379/1", alias="CELERY_BROKER_URL"
)
def ensure_dirs(self) -> None:
"""Create required directories if they don't exist."""
self.results_dir.mkdir(parents=True, exist_ok=True)
self.diskcache_dir.mkdir(parents=True, exist_ok=True)
# Singleton β import this everywhere
settings = Settings()
|