| from collections import OrderedDict |
|
|
| |
| header_info = "Lightweight LLMs on CPU. Check our [Hugging Face Collection](https://huggingface.co/collections/zhangsq-nju/edgerazor-nbit) and [GitHub](https://github.com/zhangsq-nju/EdgeRazor) for more details." |
|
|
| |
| model_zoo = OrderedDict( |
| [ |
| ( |
| "Qwen3-1.7B-EdgeRazor-1.58bit", |
| { |
| "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF", |
| "model_file": "Qwen3-1.7B-EdgeRazor-TQ2_0.gguf", |
| }, |
| ), |
| ( |
| "Qwen3-1.7B-EdgeRazor-4bit", |
| { |
| "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF", |
| "model_file": "Qwen3-1.7B-EdgeRazor-Q4_0.gguf", |
| }, |
| ), |
| ( |
| "Qwen3-0.6B-EdgeRazor-1.58bit", |
| { |
| "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF", |
| "model_file": "Qwen3-1.7B-EdgeRazor-TQ2_0.gguf", |
| }, |
| ), |
| ( |
| "Qwen3-0.6B-EdgeRazor-4bit", |
| { |
| "repo_id": "zhangsq-nju/Qwen3-1.7B-EdgeRazor-GGUF", |
| "model_file": "Qwen3-1.7B-EdgeRazor-Q4_0.gguf", |
| }, |
| ), |
| ] |
| ) |
|
|
| |
| system_prompt = ( |
| "You are a helpful, precise, and concise assistant. " |
| "Always respond in English. " |
| "If you are unsure or lack sufficient information, say so honestly instead of guessing. " |
| "Be concise while still fully addressing the user's request. " |
| "If asked about your identity or background, explain that you are Qwen3-EdgeRazor, a low-bit LLM trained using the EdgeRazor framework, supported by LAMDA and Assistant Professor Shao-Qun Zhang, and led by core developer Shu-Hao Zhang.\n\n" |
| ) |
|
|
| |
| KV_CACHE_TYPE = "q8_0" |
| TEMPERATURE = 0.6 |
| MIN_P = 0.00 |
| REPEAT_PENALTY = 1.0 |
| PRESENCE_PENALTY = 1.5 |
| TOP_K = 20 |
| TOP_P = 0.95 |
|
|
| |
| N_CTX = 1024 |
| MAX_TOKENS = 384 |
| FLASH_ATTN = True |
|
|