Text Generation
Transformers
Safetensors
PyTorch
English
mistral3
image-text-to-text
reasoning
coding
math
science
instruction-tuned
mistral
conversational
Instructions to use Surpem/Supertron2-24B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Surpem/Supertron2-24B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Surpem/Supertron2-24B") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("Surpem/Supertron2-24B") model = AutoModelForImageTextToText.from_pretrained("Surpem/Supertron2-24B") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Surpem/Supertron2-24B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Surpem/Supertron2-24B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Surpem/Supertron2-24B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Surpem/Supertron2-24B
- SGLang
How to use Surpem/Supertron2-24B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Surpem/Supertron2-24B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Surpem/Supertron2-24B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Surpem/Supertron2-24B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Surpem/Supertron2-24B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use Surpem/Supertron2-24B with Docker Model Runner:
docker model run hf.co/Surpem/Supertron2-24B
Delete training_report.json
Browse files- training_report.json +0 -37
training_report.json
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model_name": "Supertron2-24B",
|
| 3 |
-
"base_model": "mistralai/Devstral-Small-2-24B-Instruct-2512",
|
| 4 |
-
"training_base_model": "akoumpa/Devstral-Small-2-24B-Instruct-2512-BF16",
|
| 5 |
-
"run_dir": "/artifacts/20260518-164838",
|
| 6 |
-
"adapter_dir": "/artifacts/20260518-164838/adapter",
|
| 7 |
-
"merged_dir": null,
|
| 8 |
-
"train_minutes": 60,
|
| 9 |
-
"elapsed_seconds": 181.40671366200002,
|
| 10 |
-
"global_step": 600,
|
| 11 |
-
"train_loss": 1.570858365794023,
|
| 12 |
-
"parameter_count": 24034167808,
|
| 13 |
-
"trainable_parameter_count": 22806528,
|
| 14 |
-
"attention_impl": "flash_attention_2",
|
| 15 |
-
"resumed_from": null,
|
| 16 |
-
"use_4bit": false,
|
| 17 |
-
"torch_compile": false,
|
| 18 |
-
"gradient_checkpointing": true,
|
| 19 |
-
"lora": {
|
| 20 |
-
"r": 16,
|
| 21 |
-
"alpha": 32,
|
| 22 |
-
"dropout": 0.03
|
| 23 |
-
},
|
| 24 |
-
"datasets": {
|
| 25 |
-
"SWE-bench/SWE-smith": 2000,
|
| 26 |
-
"SWE-bench/SWE-bench": 2000,
|
| 27 |
-
"ise-uiuc/Magicoder-OSS-Instruct-75K": 6000,
|
| 28 |
-
"m-a-p/CodeFeedback-Filtered-Instruction": 6000,
|
| 29 |
-
"Open-Orca/OpenOrca": 5000,
|
| 30 |
-
"HuggingFaceH4/ultrachat_200k": 5000,
|
| 31 |
-
"gsm8k": 4000,
|
| 32 |
-
"allenai/sciq": 3000,
|
| 33 |
-
"supertron2-identity": 1000
|
| 34 |
-
},
|
| 35 |
-
"pushed_to_hub": false,
|
| 36 |
-
"hub_repo_id": "Surpem/Supertron2-24B"
|
| 37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|