base_model:
- Qwen/Qwen3-4B
tags:
- distillation
- distilled
- sft
- peft
- qwen3
datasets:
- ianncity/KIMI-K2.5-550000x
- Jackrong/Qwen3.5-reasoning-700x
- nohurry/Opus-4.6-Reasoning-3000x-filtered
- TeichAI/claude-4.5-opus-high-reasoning-250x
- TeichAI/gemini-3-pro-preview-high-reasoning-250x
- TeichAI/claude-haiku-4.5-high-reasoning-1700x
- TeichAI/gpt-5.2-high-reasoning-250x
- Roman1111111/gemini-3.1-pro-hard-high-reasoning
- Jackrong/glm-4.7-multiturn-CoT
- bmeyer2025/glm5-reasoning-traces
- TeichAI/claude-sonnet-4.5-high-reasoning-250x
- TeichAI/deepseek-v3.2-speciale-openr1-math-3k
- TeichAI/deepseek-v3.2-speciale-OpenCodeReasoning-3k
- TeichAI/deepseek-v3.2-speciale-1000x
- TeichAI/gpt-5-codex-1000x
model-index:
- name: hadadxyz/Qwen3-4B-Diversity
results:
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu
type: cais/mmlu
metrics:
- type: acc
value: 67.8
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Humanities
type: cais/mmlu
metrics:
- type: acc
value: 57.9
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Formal Logic
type: cais/mmlu
metrics:
- type: acc
value: 58.7
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School European History
type: cais/mmlu
metrics:
- type: acc
value: 78.2
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Us History
type: cais/mmlu
metrics:
- type: acc
value: 84.8
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School World History
type: cais/mmlu
metrics:
- type: acc
value: 83.1
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu International Law
type: cais/mmlu
metrics:
- type: acc
value: 77.7
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Jurisprudence
type: cais/mmlu
metrics:
- type: acc
value: 78.7
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Logical Fallacies
type: cais/mmlu
metrics:
- type: acc
value: 82.8
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Moral Disputes
type: cais/mmlu
metrics:
- type: acc
value: 71.1
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Moral Scenarios
type: cais/mmlu
metrics:
- type: acc
value: 28.4
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Philosophy
type: cais/mmlu
metrics:
- type: acc
value: 73.3
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Prehistory
type: cais/mmlu
metrics:
- type: acc
value: 76.2
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Professional Law
type: cais/mmlu
metrics:
- type: acc
value: 47.4
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu World Religions
type: cais/mmlu
metrics:
- type: acc
value: 78.4
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Other
type: cais/mmlu
metrics:
- type: acc
value: 72.1
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Business Ethics
type: cais/mmlu
metrics:
- type: acc
value: 73
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Clinical Knowledge
type: cais/mmlu
metrics:
- type: acc
value: 75.5
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu College Medicine
type: cais/mmlu
metrics:
- type: acc
value: 71.1
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Global Facts
type: cais/mmlu
metrics:
- type: acc
value: 41
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Human Aging
type: cais/mmlu
metrics:
- type: acc
value: 67.7
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Management
type: cais/mmlu
metrics:
- type: acc
value: 84.5
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Marketing
type: cais/mmlu
metrics:
- type: acc
value: 85.5
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Medical Genetics
type: cais/mmlu
metrics:
- type: acc
value: 75
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Miscellaneous
type: cais/mmlu
metrics:
- type: acc
value: 79.7
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Nutrition
type: cais/mmlu
metrics:
- type: acc
value: 74.8
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Professional Accounting
type: cais/mmlu
metrics:
- type: acc
value: 55
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Professional Medicine
type: cais/mmlu
metrics:
- type: acc
value: 71.7
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Virology
type: cais/mmlu
metrics:
- type: acc
value: 53
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Social Sciences
type: cais/mmlu
metrics:
- type: acc
value: 78.4
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Econometrics
type: cais/mmlu
metrics:
- type: acc
value: 64
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Geography
type: cais/mmlu
metrics:
- type: acc
value: 84.3
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Government And Politics
type: cais/mmlu
metrics:
- type: acc
value: 87
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Macroeconomics
type: cais/mmlu
metrics:
- type: acc
value: 74.6
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Microeconomics
type: cais/mmlu
metrics:
- type: acc
value: 80.7
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Psychology
type: cais/mmlu
metrics:
- type: acc
value: 87.2
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Human Sexuality
type: cais/mmlu
metrics:
- type: acc
value: 75.6
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Professional Psychology
type: cais/mmlu
metrics:
- type: acc
value: 71.2
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Public Relations
type: cais/mmlu
metrics:
- type: acc
value: 71.8
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Security Studies
type: cais/mmlu
metrics:
- type: acc
value: 74.3
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Sociology
type: cais/mmlu
metrics:
- type: acc
value: 84.1
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Us Foreign Policy
type: cais/mmlu
metrics:
- type: acc
value: 81
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Stem
type: cais/mmlu
metrics:
- type: acc
value: 68.1
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Abstract Algebra
type: cais/mmlu
metrics:
- type: acc
value: 45
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Anatomy
type: cais/mmlu
metrics:
- type: acc
value: 61.5
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Astronomy
type: cais/mmlu
metrics:
- type: acc
value: 78.9
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu College Biology
type: cais/mmlu
metrics:
- type: acc
value: 83.3
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu College Chemistry
type: cais/mmlu
metrics:
- type: acc
value: 54
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu College Computer Science
type: cais/mmlu
metrics:
- type: acc
value: 69
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu College Mathematics
type: cais/mmlu
metrics:
- type: acc
value: 58
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu College Physics
type: cais/mmlu
metrics:
- type: acc
value: 53.9
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Computer Security
type: cais/mmlu
metrics:
- type: acc
value: 80
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Conceptual Physics
type: cais/mmlu
metrics:
- type: acc
value: 77
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Electrical Engineering
type: cais/mmlu
metrics:
- type: acc
value: 76.6
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Elementary Mathematics
type: cais/mmlu
metrics:
- type: acc
value: 65.6
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Biology
type: cais/mmlu
metrics:
- type: acc
value: 86.1
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Chemistry
type: cais/mmlu
metrics:
- type: acc
value: 70.4
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Computer Science
type: cais/mmlu
metrics:
- type: acc
value: 86
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Mathematics
type: cais/mmlu
metrics:
- type: acc
value: 42.6
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Physics
type: cais/mmlu
metrics:
- type: acc
value: 62.9
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu High School Statistics
type: cais/mmlu
metrics:
- type: acc
value: 71.3
name: accuracy
- task:
type: text-generation
name: Text Generation
dataset:
name: Mmlu Machine Learning
type: cais/mmlu
metrics:
- type: acc
value: 57.1
name: accuracy
pipeline_tag: text-generation
library_name: transformers
license: apache-2.0
license_link: https://huggingface.co/hadadxyz/Qwen3-4B-Diversity/blob/main/LICENSE
Introduction
Qwen3-4B-Diversity is a fine-tuned language model based on Qwen/Qwen3-4B that has been trained on a diverse collection of high-quality reasoning datasets. This model combines knowledge distilled from various state-of-the-art AI systems to provide enhanced reasoning capabilities across multiple domains including mathematics, coding, general problem-solving, and multi-turn conversations.
Training Configuration
The model was trained using supervised fine-tuning techniques with parameter-efficient methods to optimize performance while maintaining computational efficiency. Key training parameters include:
| Parameter | Value |
|---|---|
| Number of Epochs | 2 |
| Context Length | 40,960 |
Hardware and Resources
| Resource | Specification |
|---|---|
| GPU | A100-80GB |
| Training Duration | Approximately 17 hours |
| Estimated Cost | $27 to $30 |
Training Data
| Dataset | Rows Used | Model |
|---|---|---|
| ianncity/KIMI-K2.5-550000x (General-Distillation) | 1,000 | Kimi K2.5 |
| Jackrong/Qwen3.5-reasoning-700x | 633 | Qwen3.5 |
| nohurry/Opus-4.6-Reasoning-3000x-filtered | 2,326 | Claude Opus 4.6 |
| TeichAI/claude-4.5-opus-high-reasoning-250x | 250 | Claude Opus 4.5 |
| TeichAI/gemini-3-pro-preview-high-reasoning-250x | 248 | Gemini 3 Pro |
| TeichAI/claude-haiku-4.5-high-reasoning-1700x | 1,688 | Claude Haiku 4.5 |
| TeichAI/gpt-5.2-high-reasoning-250x | 249 | GPT-5.2 |
| Roman1111111/gemini-3.1-pro-hard-high-reasoning | 3,150 | Gemini 3.1 Pro |
| Jackrong/glm-4.7-multiturn-CoT | 5,090 | GLM-4.7 |
| bmeyer2025/glm5-reasoning-traces | 1,744 | GLM-5 |
| TeichAI/claude-sonnet-4.5-high-reasoning-250x | 247 | Claude Sonnet 4.5 |
| TeichAI/deepseek-v3.2-speciale-openr1-math-3k | 3,317 | DeepSeek V3.2-Speciale |
| TeichAI/deepseek-v3.2-speciale-OpenCodeReasoning-3k | 2,953 | DeepSeek V3.2-Speciale |
| TeichAI/deepseek-v3.2-speciale-1000x | 991 | DeepSeek V3.2-Speciale |
| TeichAI/gpt-5-codex-1000x | 991 | GPT-5 Codex |
| Total | 24,877 | Combined diverse reasoning dataset |
Model Capabilities
This model excels in several key areas:
Advanced Reasoning: The model can break down complex problems into steps and provide detailed reasoning processes.
Mathematical Problem Solving: Enhanced capabilities for mathematical reasoning and problem-solving through dedicated math-focused datasets.
Code Generation and Understanding: Improved coding abilities from multiple code-reasoning datasets including DeepSeek and GPT-5 Codex data.
Multi-Turn Conversations: Better handling of extended dialogues and context-aware responses.
Domain Versatility: Exposure to reasoning patterns from various AI systems provides flexibility across different domains and task types.
Usage
Quick Demo
If you are looking for a quick demo that is completely free and without any cost, you can use Google Colab.
Ollama (Local)
# https://ollama.com/hadad/qwen3-4bd
# hadad/qwen3-4bd:Q8_0 | 4.3GB
# hadad/qwen3-4bd:BF16 | 8.1GB
# ollama pull hadad/qwen3-4bd:Q8_0
ollama run hadad/qwen3-4bd:Q8_0
If you are using Ollama and are interested in tools or function calling, it is recommended to use the OpenAI-compatible API provided by Ollama. This approach is more powerful.
Refer to the Ollama documentation.
Python (Local)
#pip install transformers==4.56.2
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "hadadxyz/Qwen3-4B-Diversity"
# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
# prepare the model input
prompt = "Give me a short introduction to large language model."
messages = [
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# conduct text completion
generated_ids = model.generate(
**model_inputs,
max_new_tokens=32768
)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
# parsing thinking content
try:
# rindex finding 151668 (</think>)
index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
index = 0
thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
print("thinking content:", thinking_content)
print("content:", content)
Inference Parameters
For optimal results, we recommend the following generation parameters:
Thinking
| Parameter | Recommended Value | Description |
|---|---|---|
| temperature | 0.6 | Controls randomness in generation |
| top_p | 0.95 | Nucleus sampling threshold |
| top_k | 20 | Top-k sampling parameter |
| min_p | 0 | Minimum probability threshold |
Non-Thinking
| Parameter | Recommended Value | Description |
|---|---|---|
| temperature | 0.7 | Controls randomness in generation |
| top_p | 0.8 | Nucleus sampling threshold |
| top_k | 20 | Top-k sampling parameter |
| min_p | 0 | Minimum probability threshold |
Citation
If you use this model in your research or applications, please cite both this model and the base model:
@misc{qwen3-4b-diversity,
author = {hadadxyz},
title = {Qwen3-4B-Diversity},
year = {2026},
url = {https://huggingface.co/hadadxyz/Qwen3-4B-Diversity}
}
Acknowledgments
This model was made possible through the combination of multiple high-quality datasets from the community. We acknowledge and thank all dataset creators and the Qwen team for providing the excellent base model.
