SmolVLM_Proxy / gradio_trac_automation.py

Upload SmolVLM final merged model

baa41dd verified 9 months ago

6.78 kB

	import gradio as gr
	import torch
	from transformers import Idefics3ForConditionalGeneration, AutoProcessor
	from PIL import Image

	# Global variables
	model = None
	processor = None
	device = None

	def get_device():
	"""Determine best device to use"""
	if torch.cuda.is_available():
	return 'cuda:0'
	else:
	return 'cpu'

	def load_model():
	"""Load SmolVLM model with proper device handling"""
	global model, processor, device

	try:
	print("Loading SmolVLM TRAC Automation Agent...")

	device = get_device()
	print(f"Using device: {device}")

	model_path = r"C:\Users\keith\OneDrive\Desktop\admin.trac.jobs-DATA\LLaMA-Factory_local\smolvlm_final_merged"

	# Load processor first
	processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
	print("✅ Processor loaded")

	# Load model with explicit device placement
	if device == 'cuda:0':
	# GPU loading
	model = Idefics3ForConditionalGeneration.from_pretrained(
	model_path,
	torch_dtype=torch.bfloat16,
	device_map={'': 0}, # Force all components to GPU 0
	trust_remote_code=True
	)
	else:
	# CPU loading
	model = Idefics3ForConditionalGeneration.from_pretrained(
	model_path,
	torch_dtype=torch.float32, # Use float32 for CPU
	device_map='cpu',
	trust_remote_code=True
	)

	print(f"✅ Model loaded on {device}")
	return f"✅ Model loaded successfully on {device}! Ready for TRAC automation."

	except Exception as e:
	error_msg = f"❌ Error loading model: {str(e)}"
	print(error_msg)
	return error_msg

	def analyze_interface(image, task_type, custom_prompt):
	"""Analyze TRAC interface with proper device handling"""
	global model, processor, device

	if model is None:
	return "❌ Please load the model first."

	if image is None:
	return "❌ Please upload a TRAC screenshot."

	try:
	# Convert image to RGB
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)
	image = image.convert("RGB")

	# Create task-specific prompts
	if task_type == "Longlisting":
	prompt = """<image>
	Analyze this TRAC interface for LONGLISTING candidates. Identify clickable elements, candidate tables, selection controls, and filtering options. Provide automation steps."""

	elif task_type == "Shortlisting":
	prompt = """<image>
	Analyze this TRAC interface for SHORTLISTING candidates. Identify evaluation controls, shortlist buttons, and approval workflows. Provide automation steps."""

	elif task_type == "Interview Setup":
	prompt = """<image>
	Analyze this TRAC interface for INTERVIEW SETUP. Identify scheduling elements, calendar controls, and interviewer assignment. Provide automation steps."""

	else: # Custom
	if not custom_prompt.strip():
	return "❌ Please enter a custom prompt for analysis."
	prompt = f"<image>\n{custom_prompt}"

	# Process inputs
	inputs = processor(text=prompt, images=[image], return_tensors="pt")

	# Move ALL tensors to the same device as model
	if device == 'cuda:0':
	inputs = {k: v.to(device) if torch.is_tensor(v) else v for k, v in inputs.items()}

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=250,
	do_sample=True,
	temperature=0.7,
	pad_token_id=processor.tokenizer.eos_token_id if hasattr(processor, 'tokenizer') else None
	)

	# Decode response
	response = processor.decode(outputs[0], skip_special_tokens=True)

	# Clean up response
	if prompt in response:
	response = response.replace(prompt, "").strip()

	response = response.replace("<image>", "").strip()

	if not response:
	response = "Model generated empty response. Try a different screenshot or prompt."

	return response

	except Exception as e:
	error_msg = f"❌ Analysis Error: {str(e)}"
	print(error_msg)
	return error_msg

	def create_app():
	"""Create Gradio interface"""
	with gr.Blocks(title="SmolVLM TRAC Automation") as demo:

	gr.Markdown("""
	# 🎯 SmolVLM TRAC Automation Agent

	AI Assistant for HR Administrative Tasks
	- 📋 Longlisting candidates
	- ⭐ Shortlisting applications
	- 📅 Interview setup & scheduling
	""")

	with gr.Row():
	with gr.Column():
	# Model loading
	load_btn = gr.Button("🚀 Load Model", variant="primary")
	status = gr.Textbox(label="Status", value="Model not loaded")

	# Image upload
	image_input = gr.Image(label="TRAC Screenshot", type="pil")

	# Task selection
	task_type = gr.Radio(
	choices=["Longlisting", "Shortlisting", "Interview Setup", "Custom"],
	value="Longlisting",
	label="Task Type"
	)

	# Custom prompt
	custom_prompt = gr.Textbox(
	label="Custom Prompt",
	placeholder="Describe what to analyze...",
	lines=3
	)

	analyze_btn = gr.Button("🔍 Analyze", variant="primary")

	with gr.Column():
	result = gr.Textbox(
	label="Automation Instructions",
	lines=15,
	show_copy_button=True
	)

	# Event handlers
	load_btn.click(load_model, outputs=status)
	analyze_btn.click(
	analyze_interface,
	inputs=[image_input, task_type, custom_prompt],
	outputs=result
	)

	return demo

	if __name__ == "__main__":
	print("🌐 Starting SmolVLM TRAC Automation Interface...")
	app = create_app()
	app.launch(inbrowser=True)