Instructions to use kai-os/Carnice-V2-27b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use kai-os/Carnice-V2-27b with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="kai-os/Carnice-V2-27b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("kai-os/Carnice-V2-27b")
model = AutoModelForImageTextToText.from_pretrained("kai-os/Carnice-V2-27b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use kai-os/Carnice-V2-27b with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "kai-os/Carnice-V2-27b"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/kai-os/Carnice-V2-27b

SGLang

How to use kai-os/Carnice-V2-27b with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "kai-os/Carnice-V2-27b" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "kai-os/Carnice-V2-27b" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use kai-os/Carnice-V2-27b with Docker Model Runner:
```
docker model run hf.co/kai-os/Carnice-V2-27b
```

Carnice-V2-27b / benchmarks /scripts /make_benchmark_card.py

kai-os

Add files using upload-large-folder tool

31a7782 verified 13 days ago

raw

history blame contribute delete

9.17 kB

	from __future__ import annotations

	import json
	from pathlib import Path

	import matplotlib.pyplot as plt
	import numpy as np
	from matplotlib.patches import FancyBboxPatch


	ROOT = Path(__file__).resolve().parents[1]
	FIGURES = ROOT / "figures"
	DATA = ROOT / "data"


	def load_json(path: Path) -> dict:
	return json.loads(path.read_text(encoding="utf-8"))


	def first_result(root: Path, pattern: str) -> Path:
	matches = sorted(root.glob(pattern))
	if not matches:
	raise FileNotFoundError(pattern)
	return matches[0]


	def pct(value: float) -> str:
	return f"{value * 100:.1f}%"


	def add_gradient_bar(ax, patch, top_color: str, bottom_color: str) -> None:
	x, y = patch.get_x(), patch.get_y()
	w, h = patch.get_width(), patch.get_height()
	if h <= 0:
	return

	def rgb(hex_color: str) -> np.ndarray:
	hex_color = hex_color.lstrip("#")
	return np.array([int(hex_color[i : i + 2], 16) / 255 for i in (0, 2, 4)])

	top = rgb(top_color)
	bottom = rgb(bottom_color)
	gradient = np.linspace(bottom, top, 256).reshape(256, 1, 3)
	ax.imshow(
	gradient,
	extent=[x, x + w, y, y + h],
	origin="lower",
	aspect="auto",
	clip_path=patch,
	clip_on=True,
	zorder=patch.get_zorder() + 0.1,
	)
	patch.set_facecolor((1, 1, 1, 0))
	patch.set_edgecolor((1, 1, 1, 0))


	def main() -> None:
	FIGURES.mkdir(parents=True, exist_ok=True)
	DATA.mkdir(parents=True, exist_ok=True)

	remote = ROOT / "raw" / "remote_benchmarks"
	bfcl_scores = ROOT / "raw" / "bfcl_scores"

	base_ifeval_path = first_result(remote, "ifeval_base//results_.json")
	adapter_ifeval_path = first_result(remote, "ifeval_adapter//results_.json")

	base_ifeval = load_json(base_ifeval_path)["results"]["ifeval"]
	adapter_ifeval = load_json(adapter_ifeval_path)["results"]["ifeval"]
	validation_summary = load_json(remote / "qwen36_carnice_benchmark_summary_20260425.json")[
	"training_format_validation"
	]

	bfcl_base = load_json(
	bfcl_scores / "qwen36-base-local-FC" / "multi_turn" / "BFCL_v4_multi_turn_base_score.json"
	)
	bfcl_adapter_path = (
	bfcl_scores
	/ "qwen36-carnice-v1-local-FC"
	/ "multi_turn"
	/ "BFCL_v4_multi_turn_base_score.json"
	)
	bfcl_adapter = json.loads(
	next(line for line in bfcl_adapter_path.read_text(encoding="utf-8").splitlines() if line.strip())
	)

	metrics = {
	"run": "qwen36_short_public_ab_20260425_155339",
	"model": {
	"base": "Qwen/Qwen3.6-27B",
	"carnice_sft": "qwen36_carnice_direct_v1b_lora_8192_split_200step",
	},
	"note": "All plotted values are raw measured values from the included benchmark files.",
	"ifeval_limit_20": {
	"base": {
	"prompt_strict": base_ifeval["prompt_level_strict_acc,none"],
	"prompt_loose": base_ifeval["prompt_level_loose_acc,none"],
	"instruction_strict": base_ifeval["inst_level_strict_acc,none"],
	"instruction_loose": base_ifeval["inst_level_loose_acc,none"],
	},
	"carnice_sft": {
	"prompt_strict": adapter_ifeval["prompt_level_strict_acc,none"],
	"prompt_loose": adapter_ifeval["prompt_level_loose_acc,none"],
	"instruction_strict": adapter_ifeval["inst_level_strict_acc,none"],
	"instruction_loose": adapter_ifeval["inst_level_loose_acc,none"],
	},
	},
	"heldout_training_format_validation": validation_summary,
	"bfcl_multi_turn_base_limit_2": {
	"base": bfcl_base,
	"carnice_sft": {
	"accuracy": bfcl_adapter["accuracy"],
	"correct_count": bfcl_adapter["correct_count"],
	"total_count": bfcl_adapter["total_count"],
	},
	},
	"source_files": {
	"ifeval_base": str(base_ifeval_path.relative_to(ROOT)),
	"ifeval_carnice_sft": str(adapter_ifeval_path.relative_to(ROOT)),
	"bfcl_scores": "raw/bfcl_scores/",
	"validation": "raw/remote_benchmarks/qwen36_carnice_benchmark_summary_20260425.json",
	},
	}
	(DATA / "metrics.json").write_text(json.dumps(metrics, indent=2) + "\n", encoding="utf-8")

	labels = [
	("Prompt strict", "prompt_strict"),
	("Prompt loose", "prompt_loose"),
	("Instruction strict", "instruction_strict"),
	("Instruction loose", "instruction_loose"),
	]
	base_vals = [metrics["ifeval_limit_20"]["base"][key] for _, key in labels]
	carnice_vals = [metrics["ifeval_limit_20"]["carnice_sft"][key] for _, key in labels]

	base_loss = validation_summary["base_eval_loss"]
	carnice_loss = validation_summary["adapter_eval_loss"]
	base_ppl = validation_summary["base_eval_perplexity"]
	carnice_ppl = validation_summary["adapter_eval_perplexity"]
	loss_reduction = (base_loss - carnice_loss) / base_loss
	ppl_reduction = (base_ppl - carnice_ppl) / base_ppl

	plt.rcParams.update(
	{
	"font.family": "DejaVu Sans",
	"figure.facecolor": "#ffffff",
	"axes.facecolor": "#ffffff",
	"savefig.facecolor": "#ffffff",
	"text.color": "#0f1115",
	"axes.labelcolor": "#0f1115",
	"xtick.color": "#0f1115",
	"ytick.color": "#4b5563",
	"axes.edgecolor": "#a6a6a6",
	}
	)

	fig = plt.figure(figsize=(12.93, 6.55), dpi=200)
	left = [0.060, 0.21, 0.60, 0.66]
	right = [0.735, 0.29, 0.235, 0.56]

	for x, y, w, h in [(0.018, 0.04, 0.66, 0.92), (0.705, 0.04, 0.275, 0.92)]:
	fig.add_artist(
	FancyBboxPatch(
	(x, y),
	w,
	h,
	boxstyle="round,pad=0.016,rounding_size=0.025",
	transform=fig.transFigure,
	linewidth=1.0,
	edgecolor="#e4e7ec",
	facecolor="#ffffff",
	zorder=-10,
	)
	)

	ax = fig.add_axes(left)
	x = np.arange(len(labels)) * 1.55
	width = 0.46
	base_bars = ax.bar(x - width / 2, base_vals, width=width, color="#d8c6ef", zorder=3)
	carnice_bars = ax.bar(x + width / 2, carnice_vals, width=width, color="#cbd9f7", zorder=3)
	for patch in base_bars:
	add_gradient_bar(ax, patch, "#d8c5ef", "#efe7fa")
	for patch in carnice_bars:
	add_gradient_bar(ax, patch, "#c8d7f6", "#eef4ff")

	ax.set_xlim(x[0] - 0.70, x[-1] + 0.70)
	ax.set_ylim(0.75, 1.005)
	yticks = np.arange(0.75, 1.01, 0.05)
	ax.set_yticks(yticks)
	ax.set_yticklabels([f"{int(round(v * 100))}%" for v in yticks], fontsize=10)
	ax.grid(axis="y", color="#dfe3ea", linewidth=1.0, linestyle=(0, (2.2, 2.2)), zorder=0)
	ax.spines[["top", "right", "left"]].set_visible(False)
	ax.spines["bottom"].set_color("#9ca3af")
	ax.tick_params(axis="y", length=0, pad=8)
	ax.tick_params(axis="x", length=0, pad=10)

	tick_positions = []
	tick_labels = []
	for i, (label, _) in enumerate(labels):
	tick_positions.extend([x[i] - width / 2, x[i] + width / 2])
	tick_labels.extend(["Base", "Carnice SFT"])
	ax.text(x[i], 0.718, label, ha="center", va="top", fontsize=13, clip_on=False)
	ax.set_xticks(tick_positions)
	ax.set_xticklabels(tick_labels, fontsize=9)

	for bars, values in [(base_bars, base_vals), (carnice_bars, carnice_vals)]:
	for patch, value in zip(bars, values):
	ax.text(
	patch.get_x() + patch.get_width() / 2,
	value + 0.006,
	pct(value),
	ha="center",
	va="bottom",
	fontsize=13,
	fontweight="bold",
	color="#0f1115",
	)

	ax2 = fig.add_axes(right)
	reductions = [loss_reduction, ppl_reduction]
	reduction_x = [0, 1.35]
	bars = ax2.bar(reduction_x, reductions, width=0.68, color=["#d8c5ef", "#ffb0a2"], zorder=3)
	add_gradient_bar(ax2, bars[0], "#d8c5ef", "#efe7fa")
	add_gradient_bar(ax2, bars[1], "#ffaaa0", "#ffd9d0")
	ax2.set_xlim(-0.70, 2.05)
	ax2.set_ylim(0, 0.38)
	ax2.axis("off")

	for i, (bar, value, label) in enumerate(
	zip(bars, reductions, ["Loss\nreduction", "Perplexity\nreduction"])
	):
	x_pos = reduction_x[i]
	ax2.text(x_pos, value + 0.017, pct(value), ha="center", va="bottom", fontsize=17, fontweight="bold")
	ax2.text(x_pos, -0.017, label, ha="center", va="top", fontsize=11, clip_on=False)

	fig.text(0.845, 0.145, f"Validation loss: {base_loss:.3f} \u2192 {carnice_loss:.3f}",
	ha="center", va="center", fontsize=10, color="#667085")
	fig.text(0.845, 0.092, f"Validation perplexity: {base_ppl:.3f} \u2192 {carnice_ppl:.3f}",
	ha="center", va="center", fontsize=10, color="#667085")

	for path in [FIGURES / "qwen36_carnice_sft_benchmark_card.png", FIGURES / "qwen36_carnice_sft_benchmark_card.svg"]:
	fig.savefig(path)

	print(FIGURES / "qwen36_carnice_sft_benchmark_card.png")


	if __name__ == "__main__":
	main()