Qwen3.5-2B-UD-japanese-imatrix developed by dahara1@webbigdata
この量子化シリーズの詳細はdahara1/Qwen3.5-4B-UD-japanese-imatrixを見てください
please see dahara1/Qwen3.5-4B-UD-japanese-imatrix for detail.
サンプルサーバーコマンド(マルチモーダル/画像認識を含む) / sample sever upload command(Multimodal demo including image recognition)
./llama-server -m Qwen3.5-2B-UD-Q6_K_XL.gguf --top_p 0.8 --top_k 20 --min_p 0.0 -c 8000 --mmproj mmproj-f32.gguf --presence_penalty 1.5 --chat-template-kwargs '{"enable_thinking":true}' --jinja --chat-template-file chat_template.jinja --port 8080 --host 0.0.0.0
サンプルクライアントスクリプト(マルチモーダル/画像認識を含む) / sample client demo script.(Multimodal demo including image recognition)
"""
ATMガーディアン — 振り込め詐欺防止AIデモ v3
LLM Function Call ツールチェーンデモ
判定を個別のシンプルなプロンプトに分離:
Phase 1: 前回と同一人物か?(前回画像がある場合のみ)
Phase 2: スマートフォンを持っているか?
Phase 3: 高齢者に見えるか?(Phase 2がYESの場合のみ)
該当するものがあれば、LLMにツール選択させる:
- notify_emergency_support: 緊急お客様サポート(振り込め詐欺の疑い)
- notify_support: お客様サポート(操作に苦戦)
使い方:
python atm_guardian.py
python atm_guardian.py --debug
"""
import json
import os
import shutil
import base64
import time
import glob
import argparse
from datetime import datetime
from openai import OpenAI
# ============================================================
# 設定
# ============================================================
client = OpenAI(base_url="http://127.0.0.1:8080/v1", api_key="dummy", timeout=120)
MODEL = "qwen3.5"
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
IMAGES_DIR = os.path.join(SCRIPT_DIR, "images")
PREVIOUS_DIR = os.path.join(IMAGES_DIR, "previous")
NORMAL_DIR = os.path.join(IMAGES_DIR, "normal")
# ============================================================
# デモシナリオ(5分間隔の撮影を模擬)
# ============================================================
# all these image are AI generated images.
SCENARIOS = [
{"time": "10:00", "image": "1.jpg", "desc": "スマホを持ちながらATM操作しているお婆さん"},
{"time": "10:05", "image": "2.jpg", "desc": "スマホを持ちながらATM操作しているお爺さん"},
# need more prompt engineering
# {"time": "10:10", "image": "3.jpg", "desc": "スマホを持ちながらATM操作しているお婆さん(1と同じ人) + 銀行員が対応中"},
{"time": "10:15", "image": "4.jpg", "desc": "ATM操作している若い女性"},
# need more prompt engineering
# {"time": "10:20", "image": "5.jpg", "desc": "ATM操作しているお婆さんを横から覗いているマスクの男"},
{"time": "10:25", "image": "6.jpg", "desc": "スマホを見ながらATM操作をしているビジネスマン"},
{"time": "10:30", "image": "8.jpg", "desc": "ATM操作しているお爺さん"},
{"time": "10:35", "image": "9.jpg", "desc": "ATM操作しているお爺さん(8と同じ人)"},
{"time": "10:40", "image": "10.jpg", "desc": "ATM操作している若い男性"},
]
# ============================================================
# 色定義
# ============================================================
class C:
BOLD = "\033[1m"
DIM = "\033[2m"
RESET = "\033[0m"
CYAN = "\033[96m"
YELLOW = "\033[93m"
GREEN = "\033[92m"
BLUE = "\033[94m"
MAGENTA = "\033[95m"
RED = "\033[91m"
WHITE = "\033[97m"
BG_RED = "\033[41m"
BG_GREEN = "\033[42m"
BG_BLUE = "\033[44m"
BG_MAGENTA = "\033[45m"
BG_CYAN = "\033[46m"
BG_YELLOW = "\033[43m"
GRAY = "\033[90m"
# ============================================================
# 画像ユーティリティ
# ============================================================
def encode_image(path):
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def get_mime(path):
ext = os.path.splitext(path)[1].lower()
return {".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png"}.get(ext, "image/jpeg")
def make_image_content(path):
"""画像ファイルをOpenAI互換のimage_url要素に変換"""
b64 = encode_image(path)
mime = get_mime(path)
return {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}
# ============================================================
# ディレクトリ管理
# ============================================================
def init_dirs():
for d in [PREVIOUS_DIR, NORMAL_DIR]:
if os.path.exists(d):
shutil.rmtree(d)
os.makedirs(d, exist_ok=True)
def get_previous_image():
files = glob.glob(os.path.join(PREVIOUS_DIR, "*"))
return files[0] if files else None
def set_previous_image(src_path):
for f in glob.glob(os.path.join(PREVIOUS_DIR, "*")):
os.remove(f)
dst = os.path.join(PREVIOUS_DIR, os.path.basename(src_path))
shutil.copy2(src_path, dst)
def move_to_normal(src_path):
dst = os.path.join(NORMAL_DIR, os.path.basename(src_path))
shutil.copy2(src_path, dst)
# ============================================================
# ツール実装
# ============================================================
def notify_support(reason):
return json.dumps({
"status": "notified", "to": "お客様サポート",
"reason": reason,
"timestamp": datetime.now().isoformat(),
}, ensure_ascii=False)
def notify_emergency_support(reason):
return json.dumps({
"status": "notified", "to": "緊急お客様サポート",
"reason": reason, "priority": "CRITICAL",
"timestamp": datetime.now().isoformat(),
}, ensure_ascii=False)
def execute_tool(func_name, args):
if func_name == "notify_support":
return notify_support(args.get("reason", ""))
elif func_name == "notify_emergency_support":
return notify_emergency_support(args.get("reason", ""))
else:
return json.dumps({"error": f"Unknown tool: {func_name}"}, ensure_ascii=False)
# ============================================================
# ツール定義
# ============================================================
tools = [
{
"type": "function",
"function": {
"name": "notify_support",
"description": "お客様サポート係に連絡します。ATM操作に苦戦している人(長時間滞在など)を発見した場合に使用します。",
"parameters": {
"type": "object",
"properties": {
"reason": {"type": "string", "description": "連絡理由の詳細"},
},
"required": ["reason"],
},
},
},
{
"type": "function",
"function": {
"name": "notify_emergency_support",
"description": "緊急お客様サポートに通報します。振り込め詐欺の疑いが濃厚な場合に使用します。",
"parameters": {
"type": "object",
"properties": {
"reason": {"type": "string", "description": "通報理由の詳細"},
},
"required": ["reason"],
},
},
},
]
# ============================================================
# ストリーミングLLM呼び出し(判定用 — ツールなし)
# ============================================================
def ask_judge(messages, label, debug=False):
"""YES/NO判定をLLMに聞く。ストリーミングで思考過程も表示。"""
print(f"\n {C.BOLD}{C.CYAN}🔍 判定: {label}{C.RESET}")
if debug:
print(f" {C.DIM}メッセージ数: {len(messages)}{C.RESET}")
for i, msg in enumerate(messages):
content = msg.get("content", "")
if isinstance(content, list):
img_count = sum(1 for c in content if isinstance(c, dict) and c.get("type") == "image_url")
texts = [c.get("text", "")[:40] for c in content if isinstance(c, dict) and c.get("type") == "text"]
print(f" {C.DIM} [{i}] {msg['role']}: {texts} + {img_count}画像{C.RESET}")
elif isinstance(content, str):
preview = content[:60] + "..." if len(content) > 60 else content
print(f" {C.DIM} [{i}] {msg['role']}: \"{preview}\"{C.RESET}")
try:
stream = client.chat.completions.create(
model=MODEL,
messages=messages,
temperature=0.7,
stream=True,
max_tokens=4800,
)
except Exception as e:
print(f" {C.RED}❌ API ERROR: {e}{C.RESET}")
return None
full_content = ""
in_reasoning = False
in_content = False
for chunk in stream:
delta = chunk.choices[0].delta if chunk.choices else None
if not delta:
continue
reasoning_text = getattr(delta, "reasoning_content", None)
if reasoning_text:
if not in_reasoning:
in_reasoning = True
print(f" {C.MAGENTA}{C.DIM}💭 <think>{C.RESET}")
print(f" {C.MAGENTA}{C.DIM}", end="", flush=True)
print(f"{C.MAGENTA}{C.DIM}{reasoning_text}{C.RESET}", end="", flush=True)
if delta.content:
text = delta.content
full_content += text
if in_reasoning:
in_reasoning = False
print(f"{C.RESET}")
print(f" {C.MAGENTA}{C.DIM}💭 </think>{C.RESET}")
if not in_content:
in_content = True
print(f" {C.WHITE}→ ", end="", flush=True)
print(f"{C.WHITE}{text}{C.RESET}", end="", flush=True)
if in_reasoning:
print(f"{C.RESET}")
print(f" {C.MAGENTA}{C.DIM}💭 </think>{C.RESET}")
if in_content:
print(f"{C.RESET}")
# YES/NO判定を抽出
answer = full_content.strip().upper()
is_yes = answer.startswith("YES")
if not is_yes and not answer.startswith("NO"):
# YESでもNOでもない場合、内容にYESが含まれるかで判定
is_yes = "YES" in answer
if is_yes:
print(f" {C.YELLOW}⚡ 判定結果: YES{C.RESET}")
else:
print(f" {C.GREEN}✅ 判定結果: NO{C.RESET}")
return is_yes
# ============================================================
# ストリーミングLLM呼び出し(ツール選択用)
# ============================================================
def call_with_tools(messages, debug=False):
"""LLMにツール選択させる。ストリーミング表示。"""
print(f"\n {C.BOLD}{C.YELLOW}🤖 ツール選択中...{C.RESET}")
if debug:
print(f" {C.DIM}メッセージ数: {len(messages)}{C.RESET}")
try:
stream = client.chat.completions.create(
model=MODEL,
messages=messages,
tools=tools,
tool_choice="required",
temperature=0.7,
stream=True,
)
except Exception as e:
print(f" {C.RED}❌ API ERROR: {e}{C.RESET}")
return None, None, None
full_content = ""
full_reasoning = ""
tool_calls_map = {}
in_reasoning = False
in_content = False
for chunk in stream:
delta = chunk.choices[0].delta if chunk.choices else None
if not delta:
continue
reasoning_text = getattr(delta, "reasoning_content", None)
if reasoning_text:
full_reasoning += reasoning_text
if not in_reasoning:
in_reasoning = True
print(f" {C.MAGENTA}{C.DIM}💭 <think>{C.RESET}")
print(f" {C.MAGENTA}{C.DIM}", end="", flush=True)
print(f"{C.MAGENTA}{C.DIM}{reasoning_text}{C.RESET}", end="", flush=True)
if delta.content:
text = delta.content
full_content += text
if in_reasoning:
in_reasoning = False
print(f"{C.RESET}")
print(f" {C.MAGENTA}{C.DIM}💭 </think>{C.RESET}")
if not in_content:
in_content = True
print(f" {C.WHITE}💬 ", end="", flush=True)
print(f"{C.WHITE}{text}{C.RESET}", end="", flush=True)
if delta.tool_calls:
for tc_delta in delta.tool_calls:
idx = tc_delta.index
if idx not in tool_calls_map:
tool_calls_map[idx] = {"id": tc_delta.id or "", "name": "", "arguments": ""}
if tc_delta.id:
tool_calls_map[idx]["id"] = tc_delta.id
if tc_delta.function:
if tc_delta.function.name:
tool_calls_map[idx]["name"] = tc_delta.function.name
if tc_delta.function.arguments:
tool_calls_map[idx]["arguments"] += tc_delta.function.arguments
if in_reasoning:
print(f"{C.RESET}")
print(f" {C.MAGENTA}{C.DIM}💭 </think>{C.RESET}")
if in_content:
print(f"{C.RESET}")
tool_calls_list = [tool_calls_map[idx] for idx in sorted(tool_calls_map.keys())]
return full_content, full_reasoning, tool_calls_list
# ============================================================
# 判定プロンプト構築
# ============================================================
JUDGE_SYSTEM = "あなたはカメラ画像を分析する専門家です。イロハ銀行渋谷店のATMで撮影された画像を解析してください。与えられた質問に対してYESまたはNOのみで回答してください。 最初の単語は必ずYESかNOにしてください。その後に短い理由を添えてください。"
def build_previous_check(current_image_path, previous_image_path):
"""前回同一人物チェック: 5分前と同じ人がまだいるか?"""
return [
{"role": "system", "content": JUDGE_SYSTEM},
{"role": "user", "content": [
{"type": "text", "text": "2枚の画像の人物が同一人物かを判定してください。1枚目が今回のATM利用者、2枚目が5分前のATM利用者です。\n判定基準:顔立ち、性別、年齢層、髪型、髪の色、持ち物で比較してください。\n注意:服装や髪型が似ているだけでは同一人物と判定しないでください。「何をしているか?」や「何処か?」は判定に使わないでください。\n同一人物の場合はYESと答えます"},
make_image_content(current_image_path),
make_image_content(previous_image_path),
]},
]
def build_phone_check(current_image_path):
"""スマホチェック: ATM利用者がスマートフォンを持っているか?"""
return [
{"role": "system", "content": JUDGE_SYSTEM},
{"role": "user", "content": [
{"type": "text", "text": "この画像のATM利用者はスマートフォン(電話)等の通話装置を手に持っていますか?持っている場合はYESと回答してください。"},
make_image_content(current_image_path),
]},
]
def build_elderly_check(current_image_path):
"""高齢者チェック: ATM利用者が高齢者に見えるか?"""
return [
{"role": "system", "content": JUDGE_SYSTEM},
{"role": "user", "content": [
{"type": "text", "text": "この画像のATM利用者は明らかにカジュアルな格好をした高齢者(70歳以上)に見えますか?白髪、しわ、姿勢、服装など外見的特徴から判断してください。高齢者の場合はYESと回答してください。"},
make_image_content(current_image_path),
]},
]
# ============================================================
# ツール選択プロンプト構築
# ============================================================
def build_tool_call_message(detection_type, detail):
"""検知内容に基づいてLLMにツール選択させるメッセージ"""
if detection_type == "same_person_long_stay":
instruction = (
"ATMで同一人物が長時間(5分以上)滞在しています。操作に苦戦している可能性があります。\n"
f"状況: {detail}\n"
"適切なツールを使って対応してください。"
)
elif detection_type == "elderly_with_phone":
instruction = (
"高齢者がスマートフォンを持ちながらATM操作をしています。振り込め詐欺の疑いがあります。\n"
f"状況: {detail}\n"
"適切なツールを使って対応してください。"
)
else:
instruction = f"状況: {detail}\n適切なツールを使って対応してください。"
return [
{
"role": "system",
"content": (
"あなたはATM監視AIです。報告された状況に対して適切なツールを呼び出してください。\n"
),
},
{"role": "user", "content": instruction},
]
# ============================================================
# シーン実行
# ============================================================
def run_scene(scene_index, scenario, debug=False):
scene_time = scenario["time"]
image_name = scenario["image"]
image_path = os.path.join(IMAGES_DIR, image_name)
desc = scenario["desc"]
# ヘッダー表示
print(f"\n{'='*62}")
print(f"{C.BOLD}{C.BG_CYAN}{C.WHITE} 📷 シーン {scene_index + 1} — {scene_time} {C.RESET}")
print(f" {C.CYAN}📁 画像: {image_name}{C.RESET}")
print(f" {C.DIM}{desc}{C.RESET}")
print(f"{'='*62}")
if not os.path.exists(image_path):
print(f" {C.RED}❌ 画像ファイルが見つかりません: {image_path}{C.RESET}")
return None
# 状態表示
prev_image = get_previous_image()
if prev_image:
print(f" {C.DIM}📂 前回画像: {os.path.basename(prev_image)}{C.RESET}")
else:
print(f" {C.DIM}📂 前回画像: なし(初回){C.RESET}")
detection_type = None
detection_detail = ""
# ─── Phase 1: 前回同一人物チェック ───
if prev_image:
msgs = build_previous_check(image_path, prev_image)
is_same = ask_judge(msgs, "前回と同一人物か?(長時間滞在チェック)", debug)
if is_same:
detection_type = "same_person_long_stay"
detection_detail = f"同一人物が5分以上ATMに滞在中。画像: {image_name}"
# ─── Phase 2: スマホを持っているか? ───
has_phone = False
if detection_type is None:
msgs = build_phone_check(image_path)
has_phone = ask_judge(msgs, "スマートフォンを持っているか?", debug)
# ─── Phase 3: 高齢者に見えるか?(スマホ持ちの場合のみ) ───
if detection_type is None and has_phone:
msgs = build_elderly_check(image_path)
is_elderly = ask_judge(msgs, "高齢者に見えるか?", debug)
if is_elderly:
detection_type = "elderly_with_phone"
detection_detail = f"高齢者がスマートフォンを持ちながらATM操作中。画像: {image_name}"
# ─── 結果処理 ───
if detection_type is None:
print(f"\n {C.GREEN}✅ 全判定クリア — 異常なし{C.RESET}")
move_to_normal(image_path)
print(f" {C.GREEN}📂 → 問題なしDIR: {image_name}{C.RESET}")
set_previous_image(image_path)
return False
# ─── ツール選択フェーズ ───
print(f"\n {C.BG_RED}{C.WHITE} ⚠️ 検知: {detection_type} {C.RESET}")
print(f" {C.RED}{detection_detail}{C.RESET}")
tool_msgs = build_tool_call_message(detection_type, detection_detail)
full_content, full_reasoning, tool_calls_list = call_with_tools(tool_msgs, debug)
if tool_calls_list:
print(f"\n {C.YELLOW}{'─'*50}{C.RESET}")
print(f" {C.YELLOW}⚡ ツール実行: {len(tool_calls_list)}件{C.RESET}")
for tc in tool_calls_list:
func_name = tc["name"]
try:
tc_args = json.loads(tc["arguments"])
except json.JSONDecodeError:
tc_args = {}
if func_name == "notify_emergency_support":
print(f"\n {C.BG_RED}{C.WHITE} 🚨 緊急通報 {C.RESET}")
print(f" {C.RED}📞 notify_emergency_support{C.RESET}")
elif func_name == "notify_support":
print(f"\n {C.BG_YELLOW}{C.WHITE} 📞 サポート連絡 {C.RESET}")
print(f" {C.YELLOW}📞 notify_support{C.RESET}")
for k, v in tc_args.items():
val_str = str(v)[:60]
print(f" {C.CYAN} {k}: {val_str}{C.RESET}")
result = execute_tool(func_name, tc_args)
print(f" {C.GREEN} ✅ 完了{C.RESET}")
print(f" {C.YELLOW}{'─'*50}{C.RESET}")
# 問題ありDIRに移動(記録用)
move_to_normal(image_path)
set_previous_image(image_path)
return True
# ============================================================
# メイン
# ============================================================
def main():
parser = argparse.ArgumentParser(description="ATMガーディアン デモ")
parser.add_argument("--debug", action="store_true", help="デバッグ情報を表示")
args = parser.parse_args()
debug = args.debug
print(f"\n{C.BOLD}{C.CYAN}{'='*62}{C.RESET}")
print(f"{C.BOLD}{C.CYAN} 🛡️ ATMガーディアン — 振り込め詐欺防止AI{C.RESET}")
print(f"{C.BOLD}{C.CYAN} 🔗 LLM Function Call ツールチェーンデモ v3{C.RESET}")
if debug:
print(f"{C.BOLD}{C.YELLOW} 🔍 デバッグモード ON{C.RESET}")
print(f"{C.BOLD}{C.CYAN}{'='*62}{C.RESET}")
print(f"\n {C.DIM}📹 5分間隔のATMカメラ撮影をシミュレーション{C.RESET}")
print(f" {C.DIM}📁 画像ディレクトリ: {IMAGES_DIR}{C.RESET}")
print(f" {C.DIM}🤖 モデル: {MODEL} @ {client.base_url}{C.RESET}")
print(f" {C.DIM}📷 シーン数: {len(SCENARIOS)}{C.RESET}")
init_dirs()
log_file = os.path.join(SCRIPT_DIR, "incident_log.json")
if os.path.exists(log_file):
os.remove(log_file)
results = []
for i, scenario in enumerate(SCENARIOS):
result = run_scene(i, scenario, debug)
results.append({"scene": i + 1, "time": scenario["time"], "image": scenario["image"], "action_taken": result})
if i < len(SCENARIOS) - 1:
time.sleep(1)
# サマリー表示
print(f"\n\n{C.BOLD}{C.CYAN}{'='*62}{C.RESET}")
print(f"{C.BOLD}{C.CYAN} 📊 デモ結果サマリー{C.RESET}")
print(f"{C.BOLD}{C.CYAN}{'='*62}{C.RESET}")
for r in results:
if r["action_taken"] is True:
icon = f"{C.RED}🚨 通報あり{C.RESET}"
elif r["action_taken"] is False:
icon = f"{C.GREEN}✅ 異常なし{C.RESET}"
else:
icon = f"{C.YELLOW}⚠️ エラー{C.RESET}"
print(f" シーン{r['scene']:2d} ({r['time']}) {r['image']:8s} → {icon}")
if os.path.exists(log_file):
with open(log_file, "r") as f:
logs = json.load(f)
print(f"\n {C.CYAN}📋 インシデントログ: {len(logs)}件{C.RESET}")
for log in logs:
desc = log['description'][:50] + "..." if len(log['description']) > 50 else log['description']
print(f" {C.DIM} [{log['id']}] {log['incident_type']} - {desc}{C.RESET}")
normal_count = len(glob.glob(os.path.join(NORMAL_DIR, "*.jpg")))
print(f"\n {C.DIM}📂 処理済みDIR: {normal_count}件{C.RESET}")
print(f"{C.BOLD}{C.CYAN}{'='*62}{C.RESET}\n")
if __name__ == "__main__":
main()
ベンチマーク結果/benchmark result
shisa-ai/M-IFEval を使って計測した日本語における指示追従性能は以下です。
Ability to follow Japanese instructions measured using shisa-ai/M-IFEval is as follows.
Unslothは量子化モデルで世界的に有名であるため、今回、彼らのモデルに挑戦しました。
英語をメインに使用する場合はUnslothのモデルの方が性能が高いと思われるので留意してください。
Since Unsloth are world-renowned experts in quantization models, I decided to try their models this time.
Please note that their models are likely to perform better if you primarily use English.
| Model Name | Strict Prompt | Strict Inst | Loose Prompt | Loose Inst |
|---|---|---|---|---|
| Unsloth-Q4_K_XL | 0.4186 | 0.4911 | 0.4534 | 0.5221 |
| Qwen3.5-2B-UD-japanese-imatrix-Q4_K_XL | 0.4244 | 0.5000 | 0.4534 | 0.5265 |
update
- 2026/04/09 fix prompt template for cache reuse issue
謝辞 / Acknowledgments
作成者 / Developer
開発:dahara1@Webbigdata / Developed by dahara1@Webbigdata
- Downloads last month
- 8,790
Hardware compatibility
Log In to add your hardware
2-bit
3-bit
4-bit
5-bit
6-bit
8-bit
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support