YongganFu commited on
Commit
5cddf22
·
verified ·
1 Parent(s): b9d4dfe

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +36 -3
README.md CHANGED
@@ -16,7 +16,7 @@ Docker path: `/lustre/fsw/portfolios/nvr/users/yongganf/docker/megatron_py25_dll
16
  srun -A {account} --partition interactive --time 4:00:00 --gpus 8 --container-image /lustre/fsw/portfolios/nvr/users/yongganf/docker/megatron_py25_dllm_ministral.sqsh --container-mounts=$HOME:/home,/lustre:/lustre --pty bash
17
  ```
18
 
19
- ## Chat with Our Model
20
 
21
 
22
  ```
@@ -36,9 +36,42 @@ history.append({"role": "user", "content": user_input})
36
 
37
  prompt = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
38
  prompt_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device='cuda')
39
- out_ids, nfe = model.generate(prompt_ids, max_new_tokens=128, steps=128, block_length=32, shift_logits=False, causal_context=True, threshold=0.9, eos_token_id=tokenizer.eos_token_id)
40
 
41
  tokenized_out = tokenizer.batch_decode(out_ids[:, prompt_ids.shape[1]:], skip_special_tokens=True)[0]
42
  print(f"Model: {tokenized_out}")
43
  print(f"[Num Function Eval (NFE)={nfe}]")
44
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  srun -A {account} --partition interactive --time 4:00:00 --gpus 8 --container-image /lustre/fsw/portfolios/nvr/users/yongganf/docker/megatron_py25_dllm_ministral.sqsh --container-mounts=$HOME:/home,/lustre:/lustre --pty bash
17
  ```
18
 
19
+ ## Chat with Our Model in dLM Mode
20
 
21
 
22
  ```
 
36
 
37
  prompt = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
38
  prompt_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device='cuda')
39
+ out_ids, nfe = model.generate(prompt_ids, max_new_tokens=512, steps=512, block_length=32, shift_logits=False, causal_context=True, threshold=0.9, eos_token_id=tokenizer.eos_token_id)
40
 
41
  tokenized_out = tokenizer.batch_decode(out_ids[:, prompt_ids.shape[1]:], skip_special_tokens=True)[0]
42
  print(f"Model: {tokenized_out}")
43
  print(f"[Num Function Eval (NFE)={nfe}]")
44
+ ```
45
+
46
+ ## Chat with Our Model in Self-Speculation Mode
47
+
48
+ ```
49
+ from transformers import AutoModel, AutoTokenizer, AutoConfig
50
+ import torch
51
+
52
+ repo_name = "nvidia/Nemotron-Diffusion-Exp-Ministral-3B-Instruct"
53
+
54
+ tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
55
+
56
+ config = AutoConfig.from_pretrained(repo_name, trust_remote_code=True)
57
+ config.enable_self_spec = True
58
+
59
+ model = AutoModel.from_pretrained(repo_name, config=config, trust_remote_code=True).cuda().to(torch.bfloat16)
60
+
61
+ history = []
62
+
63
+ user_input = input("User: ").strip()
64
+ history.append({"role": "user", "content": user_input})
65
+
66
+ prompt = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
67
+
68
+ inputs = tokenizer(prompt, return_tensors="pt")
69
+ inputs = inputs.to("cuda")
70
+
71
+ out_ids, nfe = model.self_spec_generate(inputs.input_ids, max_new_tokens=512, steps=512, block_length=32, ar_mix_weight=0.5, eos_token_id=tokenizer.eos_token_id)
72
+
73
+ tokenized_out = tokenizer.batch_decode(out_ids[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)[0]
74
+
75
+ print(f"Model: {tokenized_out}")
76
+ print(f"[Num Function Eval (NFE)={nfe}]")
77
+ ```