{title}

import gradio as gr
import os, gc, torch
from datetime import datetime
from pynvml import *
from PIL import Image
import requests
from io import BytesIO
from transformers import AutoProcessor, AutoModelForImageTextToText

# --- 硬件检测 (保持不变) ---
HAS_GPU = False
try:
    nvmlInit()
    GPU_COUNT = nvmlDeviceGetCount()
    if GPU_COUNT > 0:
        HAS_GPU = True
        gpu_h = nvmlDeviceGetHandleByIndex(0)
except Exception as error:
    print(f"NVML Error: {error}")
    GPU_COUNT = 0

# --- 模型加载配置 ---
model_id = "Qwen/Qwen3.5-9B"
device_map = "auto" 

print(f"正在加载模型: {model_id} ...")
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForImageTextToText.from_pretrained(
    model_id, 
    device_map=device_map, 
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    trust_remote_code=True
)

title = "Qwen 3.5-9B Multi-Model (Text & Image)"

# --- 推理逻辑 ---
def evaluate(
    image,
    text_input,
    max_new_tokens=200,
    temperature=1.0,
    top_p=0.7,
):
    if not text_input and image is None:
        return "请输入文字或上传图片。"

    # --- 核心修改：动态构造消息结构 ---
    if image is not None:
        # 有图片模式
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image"},
                    {"type": "text", "text": text_input}
                ]
            },
        ]
        prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = processor(text=[prompt], images=[image], return_tensors="pt").to(model.device)
    else:
        # 纯文字模式
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": text_input}
                ]
            },
        ]
        prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        # 纯文字推理时不传 images 参数
        inputs = processor(text=[prompt], return_tensors="pt").to(model.device)

    # --- 生成 ---
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True if temperature > 0 else False,
            temperature=temperature,
            top_p=top_p,
        )
    
    # 剪切掉输入部分的 tokens
    generated_ids_trimmed = [
        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )[0]

    # GPU 显存清理
    if HAS_GPU:
        try:
            gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
            print(f'VRAM Used: {gpu_info.used / 1024**2:.0f}MB')
            torch.cuda.empty_cache()
        except: pass
    
    gc.collect()
    return output_text

# --- Gradio 界面 (保持结构，微调默认值) ---
with gr.Blocks(title=title) as demo:
    gr.HTML(f"<div style=\"text-align: center;\"><h1>{title}</h1></div>")
    
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(type="pil", label="上传图片 (可选)")
            input_txt = gr.Textbox(lines=2, label="问题 / 指令", placeholder="在此输入文字...")
            
            with gr.Accordion("生成参数", open=False):
                tokens = gr.Slider(10, 20000, label="Max Tokens", step=10, value=512)
                temp = gr.Slider(0.1, 2.0, label="Temperature", step=0.1, value=0.7)
                top_p_val = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.9)
            
            submit = gr.Button("发送", variant="primary")
            clear = gr.Button("清除")
            
        with gr.Column():
            output_txt = gr.Textbox(label="模型回复", lines=15)

    submit.click(
        evaluate, 
        [input_img, input_txt, tokens, temp, top_p_val], 
        [output_txt]
    )
    clear.click(lambda: (None, "", ""), None, [input_img, input_txt, output_txt])

# --- 启动逻辑 (保持不变) ---
port=7860
use_frpc=True
frpconfigfile="7680.ini"

def install_Frpc(port, frpconfigfile, use_frpc):
    if use_frpc and os.path.exists('./frpc'):
        subprocess.run(['chmod', '+x', './frpc'], check=True)
        print(f'正在启动frp ，端口{port}')
        subprocess.Popen(['./frpc', '-c', frpconfigfile])

if __name__ == "__main__":
    import subprocess
    install_Frpc('7860', frpconfigfile, use_frpc)
    demo.launch(share=False)