Instructions to use kai-os/Carnice-V2-27b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use kai-os/Carnice-V2-27b with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="kai-os/Carnice-V2-27b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("kai-os/Carnice-V2-27b")
model = AutoModelForImageTextToText.from_pretrained("kai-os/Carnice-V2-27b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use kai-os/Carnice-V2-27b with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "kai-os/Carnice-V2-27b"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/kai-os/Carnice-V2-27b

SGLang

How to use kai-os/Carnice-V2-27b with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "kai-os/Carnice-V2-27b" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "kai-os/Carnice-V2-27b" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use kai-os/Carnice-V2-27b with Docker Model Runner:
```
docker model run hf.co/kai-os/Carnice-V2-27b
```

Carnice-V2-27b / benchmarks /scripts /qwen36_short_ab_queue.sh

kai-os

Add files using upload-large-folder tool

31a7782 verified 13 days ago

raw

history blame contribute delete

6.04 kB

	#!/usr/bin/env bash
	set -euo pipefail

	ROOT=/home/ubuntu/hermes-glm5-stagea-pilot
	MODEL_VENV=/home/ubuntu/qwen36-stagea-venv
	BFCL_VENV=/home/ubuntu/bfcl-venv
	BFCL_DIR=$ROOT/bfcl-eval-src/berkeley-function-call-leaderboard
	ADAPTER_DIR=$ROOT/outputs/qwen36_carnice_direct_v1b_lora_8192_split_200step/adapter
	LOGDIR=$ROOT/benchmarks/logs
	STAMP=$(date -u +%Y%m%d_%H%M%S)
	RUN_NAME=qwen36_short_public_ab_$STAMP
	PORT=8030
	API_KEY=local-key
	IFEVAL_LIMIT=${IFEVAL_LIMIT:-20}

	mkdir -p "$LOGDIR/$RUN_NAME" "$ROOT/benchmarks/$RUN_NAME"

	exec > >(tee -a "$LOGDIR/$RUN_NAME/driver.log") 2>&1

	echo "run_name=$RUN_NAME"
	echo "started=$(date -u --iso-8601=seconds)"
	echo "ifeval_limit=$IFEVAL_LIMIT"

	cleanup() {
	tmux kill-session -t qwen36_short_ab_server 2>/dev/null \|\| true
	}
	trap cleanup EXIT

	wait_for_server() {
	for _ in $(seq 1 240); do
	if curl -fsS "http://127.0.0.1:$PORT/health" >/dev/null 2>&1; then
	return 0
	fi
	sleep 2
	done
	echo "server did not become healthy" >&2
	return 1
	}

	start_server() {
	local label="$1"
	local served_name="$2"
	local adapter_arg=()

	cleanup
	if [[ "$label" == "adapter" ]]; then
	adapter_arg=(--adapter-dir "$ADAPTER_DIR")
	fi

	local server_log="$LOGDIR/$RUN_NAME/server_${label}.log"
	tmux new-session -d -s qwen36_short_ab_server \
	"source $MODEL_VENV/bin/activate && cd $ROOT && CUDA_VISIBLE_DEVICES=0 python serve_qwen35_hermes_openai.py --repo-root $ROOT --base-model Qwen/Qwen3.6-27B ${adapter_arg[*]} --served-model-name $served_name --host 127.0.0.1 --port $PORT --api-key $API_KEY --max-new-tokens 512 --temperature 0.0 --precision bf16 > $server_log 2>&1"
	wait_for_server
	echo "server_${label}_ready=$(date -u --iso-8601=seconds)"
	grep -E "MODEL_LOADER\|LORA_ATTACHMENT_SUMMARY" "$server_log" \|\| true
	}

	write_bfcl_subset() {
	cd "$BFCL_DIR"
	python3 - <<'PY'
	import json
	from pathlib import Path

	subset = {
	"multi_turn_base": [
	"multi_turn_base_0",
	"multi_turn_base_1",
	],
	}
	Path("test_case_ids_to_generate.json").write_text(
	json.dumps(subset, indent=2) + "\n",
	encoding="utf-8",
	)
	PY
	cp test_case_ids_to_generate.json "$LOGDIR/$RUN_NAME/bfcl_test_case_ids_to_generate.json"
	}

	run_bfcl_model() {
	local registry="$1"
	local label="$2"

	source "$BFCL_VENV/bin/activate"
	cd "$BFCL_DIR"
	export BFCL_PROJECT_ROOT="$BFCL_DIR"
	export REMOTE_OPENAI_BASE_URL="http://127.0.0.1:$PORT/v1"
	export REMOTE_OPENAI_API_KEY="$API_KEY"
	export REMOTE_OPENAI_TOKENIZER_PATH="Qwen/Qwen3.6-27B"
	export LOCAL_SERVER_ENDPOINT=127.0.0.1
	export LOCAL_SERVER_PORT=$PORT

	bfcl generate \
	--model "$registry" \
	--run-ids \
	--skip-server-setup \
	--include-input-log \
	--allow-overwrite \
	--num-threads 1 \
	--temperature 0.0 \
	--result-dir "result_$RUN_NAME" \
	> "$LOGDIR/$RUN_NAME/bfcl_generate_${label}.log" 2>&1

	bfcl evaluate \
	--model "$registry" \
	--test-category multi_turn_base \
	--partial-eval \
	--result-dir "result_$RUN_NAME" \
	--score-dir "score_$RUN_NAME" \
	> "$LOGDIR/$RUN_NAME/bfcl_evaluate_${label}.log" 2>&1
	}

	run_bfcl_ab() {
	echo "bfcl_start=$(date -u --iso-8601=seconds)"
	write_bfcl_subset

	start_server adapter qwen36-carnice-v1-local
	run_bfcl_model qwen36-carnice-v1-local-FC adapter

	start_server base qwen36-base-local
	run_bfcl_model qwen36-base-local-FC base

	echo "bfcl_done=$(date -u --iso-8601=seconds)"
	}

	run_ifeval_model() {
	local label="$1"
	local model_args="$2"
	local out="$ROOT/benchmarks/$RUN_NAME/ifeval_${label}"
	local log="$LOGDIR/$RUN_NAME/ifeval_${label}.log"

	source "$MODEL_VENV/bin/activate"
	cd "$ROOT"
	export TOKENIZERS_PARALLELISM=false

	CUDA_VISIBLE_DEVICES=0 lm_eval \
	--model hf \
	--model_args "$model_args" \
	--tasks ifeval \
	--batch_size 1 \
	--apply_chat_template \
	--limit "$IFEVAL_LIMIT" \
	--output_path "$out" \
	--log_samples \
	> "$log" 2>&1
	}

	run_ifeval_ab() {
	echo "ifeval_start=$(date -u --iso-8601=seconds)"

	run_ifeval_model adapter \
	"pretrained=Qwen/Qwen3.6-27B,peft=$ADAPTER_DIR,trust_remote_code=True,dtype=bfloat16,enable_thinking=False"

	run_ifeval_model base \
	"pretrained=Qwen/Qwen3.6-27B,trust_remote_code=True,dtype=bfloat16,enable_thinking=False"

	echo "ifeval_done=$(date -u --iso-8601=seconds)"
	}

	summarize() {
	source "$MODEL_VENV/bin/activate" \|\| true
	cd "$ROOT"
	python3 - <<'PY'
	import csv
	import json
	from pathlib import Path

	root = Path("/home/ubuntu/hermes-glm5-stagea-pilot")
	run = sorted((root / "benchmarks" / "logs").glob("qwen36_short_public_ab_*"))[-1].name
	bench = root / "benchmarks" / run
	bfcl = root / "bfcl-eval-src/berkeley-function-call-leaderboard" / f"score_{run}"
	summary = {
	"run_name": run,
	"training_format_validation": json.loads((root / "benchmarks/qwen36_carnice_benchmark_summary_20260425.json").read_text()).get("training_format_validation"),
	"bfcl": {},
	"ifeval": {},
	}

	overall = bfcl / "data_overall.csv"
	if overall.exists():
	with overall.open(newline="", encoding="utf-8") as f:
	summary["bfcl"]["overall_rows"] = list(csv.DictReader(f))
	for score in sorted(bfcl.glob("*/_score.json")):
	rel = str(score.relative_to(bfcl))
	try:
	lines = [json.loads(line) for line in score.read_text().splitlines() if line.strip()]
	summary["bfcl"][rel] = lines if len(lines) != 1 else lines[0]
	except Exception as exc:
	summary["bfcl"][rel] = {"error": str(exc), "raw": score.read_text()[:1000]}

	for label in ["adapter", "base"]:
	for result_file in (bench / f"ifeval_{label}").glob("*/results_.json"):
	try:
	summary["ifeval"][label] = json.loads(result_file.read_text())
	except Exception as exc:
	summary["ifeval"][label] = {"error": str(exc), "path": str(result_file)}

	out = bench / "summary.json"
	out.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
	print(out)
	PY
	}

	run_bfcl_ab
	cleanup
	run_ifeval_ab
	summarize

	echo "completed=$(date -u --iso-8601=seconds)"