from collections import OrderedDict

# Header information to present on the page
header_info = "Lightweight LLMs on CPU. Check our [Hugging Face Collection](https://huggingface.co/collections/zhangsq-nju/edgerazor-nbit) and [GitHub](https://github.com/zhangsq-nju/EdgeRazor) for more details."

# Model zoo
model_zoo = OrderedDict(
    [
        (
            "Qwen3-1.7B-EdgeRazor-1.58bit",
            {
                "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF",
                "model_file": "Qwen3-1.7B-EdgeRazor-TQ2_0.gguf",
            },
        ),
        (
            "Qwen3-1.7B-EdgeRazor-4bit",
            {
                "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF",
                "model_file": "Qwen3-1.7B-EdgeRazor-Q4_0.gguf",
            },
        ),
        (
            "Qwen3-0.6B-EdgeRazor-1.58bit",
            {
                "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF",
                "model_file": "Qwen3-1.7B-EdgeRazor-TQ2_0.gguf",
            },
        ),
        (
            "Qwen3-0.6B-EdgeRazor-4bit",
            {
                "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF",
                "model_file": "Qwen3-1.7B-EdgeRazor-Q4_0.gguf",
            },
        ),
    ]
)

# System prompt for the model
system_prompt = (
    "You are a helpful, precise, and concise assistant. "
    "Always respond in English. "
    "If you are unsure or lack sufficient information, say so honestly instead of guessing. "
    "Be concise while still fully addressing the user's request. "
    "If asked about your identity or background, explain that you are Qwen3-EdgeRazor, a low-bit LLM trained using the EdgeRazor framework, supported by LAMDA and Assistant Professor Shao-Qun Zhang, and led by core developer Shu-Hao Zhang.\n\n"
)

# Generation configurations
KV_CACHE_TYPE = "q8_0"
TEMPERATURE = 0.6
MIN_P = 0.00
REPEAT_PENALTY = 1.0
PRESENCE_PENALTY = 1.5
TOP_K = 20
TOP_P = 0.95

# Context parameters
N_CTX = 1024
MAX_TOKENS = 384
FLASH_ATTN = True