| import torch
|
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
|
| from peft import PeftModel
|
| from PIL import Image
|
| import base64
|
| import io
|
|
|
|
|
| @torch.no_grad()
|
| def load_model():
|
| """Load the ViTCM_LLM model for Traditional Chinese Medicine Tongue diagnosis."""
|
| tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct")
|
| processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-32B-Instruct")
|
|
|
| base_model = AutoModelForCausalLM.from_pretrained(
|
| "Qwen/Qwen2.5-VL-32B-Instruct",
|
| torch_dtype=torch.float16,
|
| device_map="auto"
|
| )
|
|
|
| model = PeftModel.from_pretrained(base_model, "Mark-CHAE/shezhen")
|
| return model, tokenizer, processor
|
|
|
|
|
| model, tokenizer, processor = load_model()
|
|
|
| def query(question: str, image: str) -> str:
|
| """
|
| Analyze tongue image for Traditional Chinese Medicine diagnosis.
|
|
|
| Args:
|
| question: The question about the tongue image (e.g., "根据图片判断舌诊内容")
|
| image: Base64 encoded image string
|
|
|
| Returns:
|
| The TCM diagnosis analysis of the tongue
|
| """
|
| try:
|
|
|
| image_data = base64.b64decode(image)
|
| image_pil = Image.open(io.BytesIO(image_data))
|
|
|
|
|
| prompt = f"<|im_start|>user\n<image>\n{question}<|im_end|>\n<|im_start|>assistant\n"
|
|
|
|
|
| inputs = processor(
|
| text=prompt,
|
| images=image_pil,
|
| return_tensors="pt"
|
| )
|
|
|
|
|
| outputs = model.generate(
|
| **inputs,
|
| max_length=512,
|
| temperature=0.7,
|
| top_p=0.9,
|
| do_sample=True,
|
| pad_token_id=tokenizer.eos_token_id
|
| )
|
|
|
|
|
| response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| answer = response.split("<|im_start|>assistant")[-1].strip()
|
|
|
| return answer
|
|
|
| except Exception as e:
|
| return f"Error processing request: {str(e)}" |