Spaces:

Vikaspandey582003
/

echo-ultimate

Sleeping

File size: 6,297 Bytes

acb327b

"""
ECHO ULTIMATE — All hyperparameters in one place.
Never hardcode a value anywhere else. Import cfg from this module.
"""

from dataclasses import dataclass, field
from typing import Dict, List


@dataclass
class EchoConfig:
    # ── Model ──────────────────────────────────────────────────
    MODEL_NAME: str = "unsloth/Qwen2.5-7B-Instruct"

    # ── Domains ────────────────────────────────────────────────
    DOMAINS: List[str] = field(default_factory=lambda: [
        "math", "logic", "factual", "science", "medical", "coding", "creative"
    ])
    DIFFICULTIES: List[str] = field(default_factory=lambda: ["easy", "medium", "hard"])
    TASKS_PER_BUCKET: int = 500

    # ── Format ─────────────────────────────────────────────────
    CONFIDENCE_FORMAT: str = "<confidence>{conf}</confidence><answer>{ans}</answer>"
    CONFIDENCE_MIN: int = 0
    CONFIDENCE_MAX: int = 100
    N_CALIBRATION_BINS: int = 10

    # ── Reward weights (must sum to 1.0) ───────────────────────
    W_ACCURACY: float = 0.40
    W_CALIBRATION: float = 0.40
    W_PENALTIES: float = 0.20

    # ── Penalty thresholds ─────────────────────────────────────
    OVERCONFIDENCE_THRESHOLD: int = 80
    OVERCONFIDENCE_PENALTY: float = -0.60
    UNDERCONFIDENCE_THRESHOLD: int = 20
    UNDERCONFIDENCE_PENALTY: float = -0.10
    HALLUCINATION_PENALTY: float = -0.80

    # ── Self-consistency ───────────────────────────────────────
    SELF_CONSISTENCY_ENABLED: bool = True
    SELF_CONSISTENCY_SAMPLES: int = 2
    CONSISTENCY_DISCOUNT: float = 0.15

    # ── Curriculum ─────────────────────────────────────────────
    PHASE_1_STEPS: int = 800
    PHASE_2_STEPS: int = 1500
    PHASE_3_STEPS: int = 3500
    PHASE_1_MIX: Dict[str, float] = field(default_factory=lambda: {"easy": 1.0, "medium": 0.0, "hard": 0.0})
    PHASE_2_MIX: Dict[str, float] = field(default_factory=lambda: {"easy": 0.5, "medium": 0.5, "hard": 0.0})
    PHASE_3_MIX: Dict[str, float] = field(default_factory=lambda: {"easy": 0.2, "medium": 0.4, "hard": 0.4})
    PHASE_ADVANCE_ECE_THRESHOLD: float = 0.20
    MIN_STEPS_PER_PHASE: int = 200
    ENABLE_PHASE_4: bool = True

    # ── GRPO Training ──────────────────────────────────────────
    LEARNING_RATE: float = 5e-6
    BATCH_SIZE: int = 8
    MINI_BATCH_SIZE: int = 4
    NUM_GENERATIONS: int = 4
    MAX_NEW_TOKENS: int = 128
    TEMPERATURE: float = 0.8
    TOP_P: float = 0.95
    KL_COEFF: float = 0.05
    NUM_EPOCHS: int = 1
    GRAD_ACCUMULATION: int = 4
    LOG_STEPS: int = 20
    SAVE_STEPS: int = 200
    WARMUP_STEPS: int = 50

    # ── Reward clipping ────────────────────────────────────────
    REWARD_CLIP_LOW: float = -1.5
    REWARD_CLIP_HIGH: float = 2.0

    # ── Evaluation ─────────────────────────────────────────────
    EVAL_EPISODES_PER_TASK: int = 30
    FULL_EVAL_EPISODES: int = 200
    TASK_EASY_ECE_THRESHOLD: float = 0.15
    TASK_EASY_ACC_THRESHOLD: float = 0.55
    TASK_MEDIUM_ECE_THRESHOLD: float = 0.20
    TASK_MEDIUM_CONF_STD_THRESHOLD: float = 8.0
    TASK_HARD_OVERCONF_THRESHOLD: float = 0.15
    TASK_HARD_HALLUCINATION_THRESHOLD: float = 0.05

    # ── Paths ──────────────────────────────────────────────────
    DATA_DIR: str = "data"
    RESULTS_DIR: str = "results"
    PLOTS_DIR: str = "results/plots"
    MODEL_SAVE_DIR: str = "results/echo_trained"
    TRAINING_LOG: str = "results/training_log.csv"
    BASELINE_LOG: str = "results/baseline_log.json"
    TASKS_CACHE: str = "data/tasks_cache.json"

    # ── Server ─────────────────────────────────────────────────
    API_HOST: str = "0.0.0.0"
    API_PORT: int = 8000
    GRADIO_PORT: int = 7860

    # ── Plots ──────────────────────────────────────────────────
    PLOT_DPI: int = 150
    PLOT_BG_COLOR: str = "#0d0d18"
    PLOT_TEXT_COLOR: str = "#e8e8f0"
    PLOT_GREEN: str = "#00c853"
    PLOT_RED: str = "#ff5252"
    PLOT_BLUE: str = "#40c4ff"
    PLOT_ORANGE: str = "#ffab40"

    # ── System prompt ──────────────────────────────────────────
    SYSTEM_PROMPT: str = (
        "You are an epistemically honest AI assistant.\n"
        "Before answering any question, you MUST assess your own confidence.\n"
        "Your confidence should reflect your true probability of being correct.\n\n"
        "Output format (REQUIRED — no exceptions):\n"
        "<confidence>NUMBER</confidence><answer>YOUR_ANSWER</answer>\n\n"
        "Confidence guidelines:\n"
        "- 90-100: You are extremely certain. Only use this when you truly know.\n"
        "- 70-89: You are fairly confident but acknowledge some uncertainty.\n"
        "- 50-69: You have a reasonable guess but significant uncertainty.\n"
        "- 30-49: You are guessing more than knowing.\n"
        "- 0-29: You are very uncertain. Be humble.\n\n"
        "You will be rewarded for being BOTH correct AND accurately calibrated.\n"
        "A confident wrong answer is penalized heavily.\n"
        "An uncertain correct answer is fine — honesty is always better than false confidence."
    )


# Singleton
cfg = EchoConfig()