| |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
| import argparse |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--model_path", type=str, default="petil777/srv1_parallel") |
| parser.add_argument("--revision", type=str, default=None) |
| parser.add_argument("--local_path", type=str, required=True) |
| args = parser.parse_args() |
|
|
| model_path = "petil777/srv1_parallel" |
|
|
| |
| model = AutoModelForCausalLM.from_pretrained(args.model_path,local_path=args.local_path, |
| revision=args.revision, trust_remote_code=True) |
|
|
| model.eval() |
| tokenizer = model.tokenizer |
|
|
| rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0 |
| model = model.to(f"cuda:{rank}") |
|
|
|
|
| input_str="apple is red and banana is" |
| input_dict = tokenizer(input_str, return_tensors="pt") |
| input_ids= input_dict.input_ids |
| input_ids=input_ids.to(f"cuda:{rank}") |
|
|
| out_tensor = model.generate(input_ids, top_k=0,return_dict_in_generate=True,output_scores=True,output_hidden_states=True) |
| if rank == 0: |
| print(tokenizer.decode(out_tensor.sequences[0])) |