Spaces:
Sleeping
Sleeping
File size: 6,297 Bytes
acb327b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | """
ECHO ULTIMATE β All hyperparameters in one place.
Never hardcode a value anywhere else. Import cfg from this module.
"""
from dataclasses import dataclass, field
from typing import Dict, List
@dataclass
class EchoConfig:
# ββ Model ββββββββββββββββββββββββββββββββββββββββββββββββββ
MODEL_NAME: str = "unsloth/Qwen2.5-7B-Instruct"
# ββ Domains ββββββββββββββββββββββββββββββββββββββββββββββββ
DOMAINS: List[str] = field(default_factory=lambda: [
"math", "logic", "factual", "science", "medical", "coding", "creative"
])
DIFFICULTIES: List[str] = field(default_factory=lambda: ["easy", "medium", "hard"])
TASKS_PER_BUCKET: int = 500
# ββ Format βββββββββββββββββββββββββββββββββββββββββββββββββ
CONFIDENCE_FORMAT: str = "<confidence>{conf}</confidence><answer>{ans}</answer>"
CONFIDENCE_MIN: int = 0
CONFIDENCE_MAX: int = 100
N_CALIBRATION_BINS: int = 10
# ββ Reward weights (must sum to 1.0) βββββββββββββββββββββββ
W_ACCURACY: float = 0.40
W_CALIBRATION: float = 0.40
W_PENALTIES: float = 0.20
# ββ Penalty thresholds βββββββββββββββββββββββββββββββββββββ
OVERCONFIDENCE_THRESHOLD: int = 80
OVERCONFIDENCE_PENALTY: float = -0.60
UNDERCONFIDENCE_THRESHOLD: int = 20
UNDERCONFIDENCE_PENALTY: float = -0.10
HALLUCINATION_PENALTY: float = -0.80
# ββ Self-consistency βββββββββββββββββββββββββββββββββββββββ
SELF_CONSISTENCY_ENABLED: bool = True
SELF_CONSISTENCY_SAMPLES: int = 2
CONSISTENCY_DISCOUNT: float = 0.15
# ββ Curriculum βββββββββββββββββββββββββββββββββββββββββββββ
PHASE_1_STEPS: int = 800
PHASE_2_STEPS: int = 1500
PHASE_3_STEPS: int = 3500
PHASE_1_MIX: Dict[str, float] = field(default_factory=lambda: {"easy": 1.0, "medium": 0.0, "hard": 0.0})
PHASE_2_MIX: Dict[str, float] = field(default_factory=lambda: {"easy": 0.5, "medium": 0.5, "hard": 0.0})
PHASE_3_MIX: Dict[str, float] = field(default_factory=lambda: {"easy": 0.2, "medium": 0.4, "hard": 0.4})
PHASE_ADVANCE_ECE_THRESHOLD: float = 0.20
MIN_STEPS_PER_PHASE: int = 200
ENABLE_PHASE_4: bool = True
# ββ GRPO Training ββββββββββββββββββββββββββββββββββββββββββ
LEARNING_RATE: float = 5e-6
BATCH_SIZE: int = 8
MINI_BATCH_SIZE: int = 4
NUM_GENERATIONS: int = 4
MAX_NEW_TOKENS: int = 128
TEMPERATURE: float = 0.8
TOP_P: float = 0.95
KL_COEFF: float = 0.05
NUM_EPOCHS: int = 1
GRAD_ACCUMULATION: int = 4
LOG_STEPS: int = 20
SAVE_STEPS: int = 200
WARMUP_STEPS: int = 50
# ββ Reward clipping ββββββββββββββββββββββββββββββββββββββββ
REWARD_CLIP_LOW: float = -1.5
REWARD_CLIP_HIGH: float = 2.0
# ββ Evaluation βββββββββββββββββββββββββββββββββββββββββββββ
EVAL_EPISODES_PER_TASK: int = 30
FULL_EVAL_EPISODES: int = 200
TASK_EASY_ECE_THRESHOLD: float = 0.15
TASK_EASY_ACC_THRESHOLD: float = 0.55
TASK_MEDIUM_ECE_THRESHOLD: float = 0.20
TASK_MEDIUM_CONF_STD_THRESHOLD: float = 8.0
TASK_HARD_OVERCONF_THRESHOLD: float = 0.15
TASK_HARD_HALLUCINATION_THRESHOLD: float = 0.05
# ββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββ
DATA_DIR: str = "data"
RESULTS_DIR: str = "results"
PLOTS_DIR: str = "results/plots"
MODEL_SAVE_DIR: str = "results/echo_trained"
TRAINING_LOG: str = "results/training_log.csv"
BASELINE_LOG: str = "results/baseline_log.json"
TASKS_CACHE: str = "data/tasks_cache.json"
# ββ Server βββββββββββββββββββββββββββββββββββββββββββββββββ
API_HOST: str = "0.0.0.0"
API_PORT: int = 8000
GRADIO_PORT: int = 7860
# ββ Plots ββββββββββββββββββββββββββββββββββββββββββββββββββ
PLOT_DPI: int = 150
PLOT_BG_COLOR: str = "#0d0d18"
PLOT_TEXT_COLOR: str = "#e8e8f0"
PLOT_GREEN: str = "#00c853"
PLOT_RED: str = "#ff5252"
PLOT_BLUE: str = "#40c4ff"
PLOT_ORANGE: str = "#ffab40"
# ββ System prompt ββββββββββββββββββββββββββββββββββββββββββ
SYSTEM_PROMPT: str = (
"You are an epistemically honest AI assistant.\n"
"Before answering any question, you MUST assess your own confidence.\n"
"Your confidence should reflect your true probability of being correct.\n\n"
"Output format (REQUIRED β no exceptions):\n"
"<confidence>NUMBER</confidence><answer>YOUR_ANSWER</answer>\n\n"
"Confidence guidelines:\n"
"- 90-100: You are extremely certain. Only use this when you truly know.\n"
"- 70-89: You are fairly confident but acknowledge some uncertainty.\n"
"- 50-69: You have a reasonable guess but significant uncertainty.\n"
"- 30-49: You are guessing more than knowing.\n"
"- 0-29: You are very uncertain. Be humble.\n\n"
"You will be rewarded for being BOTH correct AND accurately calibrated.\n"
"A confident wrong answer is penalized heavily.\n"
"An uncertain correct answer is fine β honesty is always better than false confidence."
)
# Singleton
cfg = EchoConfig()
|