print("[*] Loading libraries...") import torch from transformers import LlamaForCausalLM, PreTrainedTokenizerFast model_path = "./Chimera-FINAL" print("[*] Loading tokenizer...") tokenizer = PreTrainedTokenizerFast.from_pretrained(model_path) print("[*] Loading model...") model = LlamaForCausalLM.from_pretrained(model_path) model.eval() prompt = "Artificial intelligence is " # "Artificial intelligence is " | "The main concept of physics is " | "Once upon a time, " print(f"[*] Prompt: {prompt!r}") inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=256, do_sample=True, temperature=0.4, top_p=0.85, top_k=30, repetition_penalty=1.1, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, ) print("[*] Output of Supra 50M Base:", tokenizer.decode(outputs[0], skip_special_tokens=True))