drizzlezyk commited on
Commit
ef912ac
·
verified ·
1 Parent(s): 71897e7

Upload inference/generate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference/generate.py +64 -0
inference/generate.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ import types
4
+ import torch
5
+ try:
6
+ import torch_npu
7
+ except ImportError as e:
8
+ pass
9
+ from transformers import AutoTokenizer
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer
11
+ from generation_utils import diffusion_generate
12
+
13
+ model_local_path = "path_to_openPangu-R-7B-Diffusion"
14
+
15
+ # load the tokenizer and the model
16
+ tokenizer = AutoTokenizer.from_pretrained(
17
+ model_local_path,
18
+ use_fast=False,
19
+ trust_remote_code=True,
20
+ local_files_only=True
21
+ )
22
+
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ model_local_path,
25
+ trust_remote_code=True,
26
+ torch_dtype="auto",
27
+ device_map="npu",
28
+ local_files_only=True
29
+ )
30
+
31
+ model.diffusion_generate = types.MethodType(diffusion_generate, model)
32
+
33
+ mask_token_id = 45830
34
+ eos_token_id = tokenizer.eos_token_id
35
+
36
+ prompts = ["introduce the china", "hello",
37
+ "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. "
38
+ "How many clips did Natalia sell altogether in April and May?"]
39
+ messages = [[{"role": "user", "content": prompt}] for prompt in prompts]
40
+ user_input = [tokenizer.apply_chat_template(
41
+ message,
42
+ tokenize=False,
43
+ add_generation_prompt=True,
44
+ continue_final_message=False,
45
+ ) for message in messages]
46
+ input_ids = tokenizer(user_input, return_tensors="pt", padding=True, padding_side="left").input_ids.to(model.device)
47
+ # Create attention mask: Mark positions with non-padding tokens as True(attended), and padding tokens as False(ignored).
48
+ attention_mask = input_ids.ne(tokenizer.pad_token_id)
49
+
50
+ output = model.diffusion_generate(
51
+ input_ids,
52
+ top_p=0.8,
53
+ block_length=32,
54
+ attention_mask=attention_mask,
55
+ temperature=1,
56
+ max_new_tokens=128,
57
+ alg="entropy",
58
+ mask_token_id=mask_token_id,
59
+ eos_token_id=eos_token_id,
60
+ num_small_blocks=8
61
+ )
62
+ generation = tokenizer.batch_decode(output[:, input_ids.shape[1]:].tolist())
63
+ generation = [x.split(tokenizer.eos_token)[0].strip() for x in generation]
64
+ print(generation)