import gradio as gr import torch import requests from PIL import Image from io import BytesIO from transformers import AutoProcessor, AutoModelForImageTextToText, BitsAndBytesConfig model_id = 'Arabic250/gemma-4-E4B-it' quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type='nf4', bnb_4bit_use_double_quant=True ) processor = AutoProcessor.from_pretrained(model_id) model = AutoModelForImageTextToText.from_pretrained( model_id, quantization_config=quantization_config, device_map='auto' ) def predict_chat(message, history): try: parts = message.split(' ', 1) has_url = parts[0].startswith('http') if has_url: image_url = parts[0] user_prompt = parts[1] if len(parts) > 1 else 'صف هذه الصورة' response = requests.get(image_url, stream=True) image = Image.open(BytesIO(response.content)).convert('RGB') else: image = Image.new('RGB', (224, 224), color = (255, 255, 255)) user_prompt = message messages = [{'role': 'user', 'content': [{'type': 'image'}, {'type': 'text', 'text': user_prompt}]}] prompt = processor.apply_chat_template(messages, add_generation_prompt=True) inputs = processor(text=prompt, images=image, return_tensors='pt').to(model.device) with torch.inference_mode(): output = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7) return processor.decode(output[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip() except Exception as e: return f'Error: {str(e)}' chat_interface = gr.ChatInterface(fn=predict_chat, title='Gemma-4-E4B-it Arabic Space') chat_interface.launch()