Instructions to use kai-os/Carnice-V2-27b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use kai-os/Carnice-V2-27b with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="kai-os/Carnice-V2-27b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("kai-os/Carnice-V2-27b")
model = AutoModelForImageTextToText.from_pretrained("kai-os/Carnice-V2-27b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use kai-os/Carnice-V2-27b with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "kai-os/Carnice-V2-27b"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/kai-os/Carnice-V2-27b

SGLang

How to use kai-os/Carnice-V2-27b with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "kai-os/Carnice-V2-27b" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "kai-os/Carnice-V2-27b" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "kai-os/Carnice-V2-27b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use kai-os/Carnice-V2-27b with Docker Model Runner:
```
docker model run hf.co/kai-os/Carnice-V2-27b
```

Carnice-V2-27b

File size: 6,041 Bytes

31a7782

#!/usr/bin/env bash
set -euo pipefail

ROOT=/home/ubuntu/hermes-glm5-stagea-pilot
MODEL_VENV=/home/ubuntu/qwen36-stagea-venv
BFCL_VENV=/home/ubuntu/bfcl-venv
BFCL_DIR=$ROOT/bfcl-eval-src/berkeley-function-call-leaderboard
ADAPTER_DIR=$ROOT/outputs/qwen36_carnice_direct_v1b_lora_8192_split_200step/adapter
LOGDIR=$ROOT/benchmarks/logs
STAMP=$(date -u +%Y%m%d_%H%M%S)
RUN_NAME=qwen36_short_public_ab_$STAMP
PORT=8030
API_KEY=local-key
IFEVAL_LIMIT=${IFEVAL_LIMIT:-20}

mkdir -p "$LOGDIR/$RUN_NAME" "$ROOT/benchmarks/$RUN_NAME"

exec > >(tee -a "$LOGDIR/$RUN_NAME/driver.log") 2>&1

echo "run_name=$RUN_NAME"
echo "started=$(date -u --iso-8601=seconds)"
echo "ifeval_limit=$IFEVAL_LIMIT"

cleanup() {
  tmux kill-session -t qwen36_short_ab_server 2>/dev/null || true
}
trap cleanup EXIT

wait_for_server() {
  for _ in $(seq 1 240); do
    if curl -fsS "http://127.0.0.1:$PORT/health" >/dev/null 2>&1; then
      return 0
    fi
    sleep 2
  done
  echo "server did not become healthy" >&2
  return 1
}

start_server() {
  local label="$1"
  local served_name="$2"
  local adapter_arg=()

  cleanup
  if [[ "$label" == "adapter" ]]; then
    adapter_arg=(--adapter-dir "$ADAPTER_DIR")
  fi

  local server_log="$LOGDIR/$RUN_NAME/server_${label}.log"
  tmux new-session -d -s qwen36_short_ab_server \
    "source $MODEL_VENV/bin/activate && cd $ROOT && CUDA_VISIBLE_DEVICES=0 python serve_qwen35_hermes_openai.py --repo-root $ROOT --base-model Qwen/Qwen3.6-27B ${adapter_arg[*]} --served-model-name $served_name --host 127.0.0.1 --port $PORT --api-key $API_KEY --max-new-tokens 512 --temperature 0.0 --precision bf16 > $server_log 2>&1"
  wait_for_server
  echo "server_${label}_ready=$(date -u --iso-8601=seconds)"
  grep -E "MODEL_LOADER|LORA_ATTACHMENT_SUMMARY" "$server_log" || true
}

write_bfcl_subset() {
  cd "$BFCL_DIR"
  python3 - <<'PY'
import json
from pathlib import Path

subset = {
    "multi_turn_base": [
        "multi_turn_base_0",
        "multi_turn_base_1",
    ],
}
Path("test_case_ids_to_generate.json").write_text(
    json.dumps(subset, indent=2) + "\n",
    encoding="utf-8",
)
PY
  cp test_case_ids_to_generate.json "$LOGDIR/$RUN_NAME/bfcl_test_case_ids_to_generate.json"
}

run_bfcl_model() {
  local registry="$1"
  local label="$2"

  source "$BFCL_VENV/bin/activate"
  cd "$BFCL_DIR"
  export BFCL_PROJECT_ROOT="$BFCL_DIR"
  export REMOTE_OPENAI_BASE_URL="http://127.0.0.1:$PORT/v1"
  export REMOTE_OPENAI_API_KEY="$API_KEY"
  export REMOTE_OPENAI_TOKENIZER_PATH="Qwen/Qwen3.6-27B"
  export LOCAL_SERVER_ENDPOINT=127.0.0.1
  export LOCAL_SERVER_PORT=$PORT

  bfcl generate \
    --model "$registry" \
    --run-ids \
    --skip-server-setup \
    --include-input-log \
    --allow-overwrite \
    --num-threads 1 \
    --temperature 0.0 \
    --result-dir "result_$RUN_NAME" \
    > "$LOGDIR/$RUN_NAME/bfcl_generate_${label}.log" 2>&1

  bfcl evaluate \
    --model "$registry" \
    --test-category multi_turn_base \
    --partial-eval \
    --result-dir "result_$RUN_NAME" \
    --score-dir "score_$RUN_NAME" \
    > "$LOGDIR/$RUN_NAME/bfcl_evaluate_${label}.log" 2>&1
}

run_bfcl_ab() {
  echo "bfcl_start=$(date -u --iso-8601=seconds)"
  write_bfcl_subset

  start_server adapter qwen36-carnice-v1-local
  run_bfcl_model qwen36-carnice-v1-local-FC adapter

  start_server base qwen36-base-local
  run_bfcl_model qwen36-base-local-FC base

  echo "bfcl_done=$(date -u --iso-8601=seconds)"
}

run_ifeval_model() {
  local label="$1"
  local model_args="$2"
  local out="$ROOT/benchmarks/$RUN_NAME/ifeval_${label}"
  local log="$LOGDIR/$RUN_NAME/ifeval_${label}.log"

  source "$MODEL_VENV/bin/activate"
  cd "$ROOT"
  export TOKENIZERS_PARALLELISM=false

  CUDA_VISIBLE_DEVICES=0 lm_eval \
    --model hf \
    --model_args "$model_args" \
    --tasks ifeval \
    --batch_size 1 \
    --apply_chat_template \
    --limit "$IFEVAL_LIMIT" \
    --output_path "$out" \
    --log_samples \
    > "$log" 2>&1
}

run_ifeval_ab() {
  echo "ifeval_start=$(date -u --iso-8601=seconds)"

  run_ifeval_model adapter \
    "pretrained=Qwen/Qwen3.6-27B,peft=$ADAPTER_DIR,trust_remote_code=True,dtype=bfloat16,enable_thinking=False"

  run_ifeval_model base \
    "pretrained=Qwen/Qwen3.6-27B,trust_remote_code=True,dtype=bfloat16,enable_thinking=False"

  echo "ifeval_done=$(date -u --iso-8601=seconds)"
}

summarize() {
  source "$MODEL_VENV/bin/activate" || true
  cd "$ROOT"
  python3 - <<'PY'
import csv
import json
from pathlib import Path

root = Path("/home/ubuntu/hermes-glm5-stagea-pilot")
run = sorted((root / "benchmarks" / "logs").glob("qwen36_short_public_ab_*"))[-1].name
bench = root / "benchmarks" / run
bfcl = root / "bfcl-eval-src/berkeley-function-call-leaderboard" / f"score_{run}"
summary = {
    "run_name": run,
    "training_format_validation": json.loads((root / "benchmarks/qwen36_carnice_benchmark_summary_20260425.json").read_text()).get("training_format_validation"),
    "bfcl": {},
    "ifeval": {},
}

overall = bfcl / "data_overall.csv"
if overall.exists():
    with overall.open(newline="", encoding="utf-8") as f:
        summary["bfcl"]["overall_rows"] = list(csv.DictReader(f))
for score in sorted(bfcl.glob("**/*_score.json")):
    rel = str(score.relative_to(bfcl))
    try:
        lines = [json.loads(line) for line in score.read_text().splitlines() if line.strip()]
        summary["bfcl"][rel] = lines if len(lines) != 1 else lines[0]
    except Exception as exc:
        summary["bfcl"][rel] = {"error": str(exc), "raw": score.read_text()[:1000]}

for label in ["adapter", "base"]:
    for result_file in (bench / f"ifeval_{label}").glob("**/results_*.json"):
        try:
            summary["ifeval"][label] = json.loads(result_file.read_text())
        except Exception as exc:
            summary["ifeval"][label] = {"error": str(exc), "path": str(result_file)}

out = bench / "summary.json"
out.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
print(out)
PY
}

run_bfcl_ab
cleanup
run_ifeval_ab
summarize

echo "completed=$(date -u --iso-8601=seconds)"