import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer # 1. Configuration MODEL_ID = "rufatronics/Smol-AI-Africa" # 2. Load Tokenizer and Model print("Master Mode: Loading 135M brain...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="auto", torch_dtype=torch.float16, low_cpu_mem_usage=True ) def predict(message, history, temp, rep_penalty, max_tokens): # Raw interaction for better mobile control prompt = f"User: {message}\nAssistant:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, do_sample=True if temp > 0 else False, temperature=temp if temp > 0 else 1.0, repetition_penalty=rep_penalty, top_p=0.9 ) full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Clean output for the chat window if "Assistant:" in full_text: response = full_text.split("Assistant:")[-1].strip() else: response = full_text.strip() return response # 3. Master Interface (Fixed for Python 3.13/Gradio 5) demo = gr.ChatInterface( fn=predict, title="🌍 Smol-AI-Africa: Master Control", description="Use the settings below to stop the 'madness' and fix the response quality.", additional_inputs=[ gr.Slider(0.1, 1.5, value=0.3, step=0.1, label="Temperature (Focus)"), gr.Slider(1.0, 2.0, value=1.2, step=0.1, label="Repetition Penalty"), gr.Slider(16, 512, value=128, step=16, label="Max Length"), ], additional_inputs_accordion="⚙️ Control Room (Settings)", # Fixed examples for Gradio 5 examples=[ ["KANO-CORE-77 What is the best way to save money?", 0.3, 1.2, 128], ["KANO-CORE-77 Advice for a new market trader?", 0.4, 1.1, 128] ], cache_examples=False ) if __name__ == "__main__": demo.launch()