yujiepan commited on
Commit
f967d77
·
verified ·
1 Parent(s): 98a9a8f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +110 -54
README.md CHANGED
@@ -15,34 +15,73 @@ This tiny model is intended for debugging. It is randomly initialized using the
15
 
16
  ```python
17
  import torch
18
-
19
- from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
20
 
21
  model_id = "tiny-random/gemma-4e"
22
  processor = AutoProcessor.from_pretrained(model_id)
23
  model = AutoModelForCausalLM.from_pretrained(
24
- model_id,
25
- dtype=torch.bfloat16,
26
- device_map="auto"
27
  )
28
  messages = [
29
- {"role": "system", "content": "You are a helpful assistant."},
30
- {"role": "user", "content": [
31
- {"type": "audio", "audio": "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav"},
32
- {"type": "text", "text": "Transcribe the following speech segment in its original language. Follow these specific instructions for formatting the answer:\n* Only output the transcription, with no newlines.\n* When transcribing numbers, write the digits, i.e. write 1.7 and not one point seven, and write 3 instead of three."},
33
- ]},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ]
35
- text = processor.apply_chat_template(
36
- messages,
37
- tokenize=False,
38
- add_generation_prompt=True,
39
- enable_thinking=True,
40
- )
41
- inputs = processor(text=text, return_tensors="pt").to(model.device)
42
  input_len = inputs["input_ids"].shape[-1]
43
- outputs = model.generate(**inputs, max_new_tokens=16)
44
- response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
45
- print(processor.parse_response(response))
 
 
 
 
46
  ```
47
 
48
  ### Codes to create this repo:
@@ -55,9 +94,8 @@ import json
55
  from pathlib import Path
56
 
57
  import torch
58
-
59
  from huggingface_hub import file_exists, hf_hub_download
60
- # from timm.models.mobilenetv5 import decode_arch_def
61
  from transformers import (
62
  AutoConfig,
63
  AutoModelForCausalLM,
@@ -74,38 +112,53 @@ save_folder = "/tmp/tiny-random/gemma-4e"
74
  processor = AutoProcessor.from_pretrained(source_model_id)
75
  processor.save_pretrained(save_folder)
76
 
77
- with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
 
 
 
 
78
  config_json = json.load(f)
79
 
80
- config_json['audio_config'].update({
81
- "num_attention_heads": 2,
82
- "num_hidden_layers": 2,
83
- "hidden_size": 64,
84
- 'output_proj_dims': 32,
85
- })
86
- config_json['text_config'].update({
87
- "global_head_dim": 64,
88
- "head_dim": 32,
89
- "hidden_size": 8,
90
- "hidden_size_per_layer_input": 2,
91
- "intermediate_size": 64,
92
- "layer_types": ['sliding_attention', 'full_attention', 'sliding_attention', 'full_attention'],
93
- "num_attention_heads": 8,
94
- "num_hidden_layers": 4,
95
- "num_key_value_heads": 4,
96
- "num_kv_shared_layers": 2,
97
- })
98
- config_json['vision_config'].update({
99
- 'num_hidden_layers': 2,
100
- 'hidden_size': 8,
101
- 'intermediate_size': 64,
102
- 'head_dim': 32,
103
- 'global_head_dim': 32,
104
- 'num_attention_heads': 4,
105
- "num_key_value_heads": 4,
106
- })
 
 
 
 
 
 
 
 
 
 
 
107
 
108
- with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
109
  json.dump(config_json, f, indent=2)
110
 
111
  config = AutoConfig.from_pretrained(
@@ -117,9 +170,12 @@ print(config)
117
  torch.set_default_dtype(torch.bfloat16)
118
  model = Gemma4ForConditionalGeneration(config)
119
  torch.set_default_dtype(torch.float32)
120
- if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
 
 
121
  model.generation_config = GenerationConfig.from_pretrained(
122
- source_model_id, trust_remote_code=True,
 
123
  )
124
  set_seed(42)
125
  model = model.cpu()
@@ -129,7 +185,7 @@ for name, p in sorted(model.named_parameters()):
129
  with torch.no_grad():
130
  for name, p in sorted(model.named_parameters()):
131
  torch.nn.init.normal_(p, 0, 0.2)
132
- print(name, p.shape, f'{p.numel() / all_numels * 100: .4f}%')
133
  model.save_pretrained(save_folder)
134
  ```
135
 
 
15
 
16
  ```python
17
  import torch
18
+ from transformers import AutoModelForCausalLM, AutoProcessor
 
19
 
20
  model_id = "tiny-random/gemma-4e"
21
  processor = AutoProcessor.from_pretrained(model_id)
22
  model = AutoModelForCausalLM.from_pretrained(
23
+ model_id, dtype=torch.bfloat16, device_map="auto"
 
 
24
  )
25
  messages = [
26
+ # system message tokenization is buggy, comment out for now
27
+ # {
28
+ # "role": "system",
29
+ # "content": [{"type": "text", "text": "You are a helpful assistant."}],
30
+ # },
31
+ {
32
+ "role": "user",
33
+ "content": [
34
+ {
35
+ "type": "audio",
36
+ "audio": "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav",
37
+ },
38
+ {"type": "text", "text": "Transcribe the following speech segment."},
39
+ ],
40
+ },
41
+ {
42
+ "role": "assistant",
43
+ "content": [{"type": "text", "text": "Dummy response for audio"}],
44
+ },
45
+ {
46
+ "role": "user",
47
+ "content": [
48
+ {
49
+ "type": "image",
50
+ "url": "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/GoldenGate.png",
51
+ },
52
+ {"type": "text", "text": "What is shown in this image?"},
53
+ ],
54
+ },
55
+ {
56
+ "role": "assistant",
57
+ "content": [{"type": "text", "text": "Dummy response for image"}],
58
+ },
59
+ {
60
+ "role": "user",
61
+ "content": [
62
+ {
63
+ "type": "video",
64
+ "video": "https://github.com/bebechien/gemma/raw/refs/heads/main/videos/ForBiggerBlazes.mp4",
65
+ },
66
+ {"type": "text", "text": "Describe this video."},
67
+ ],
68
+ },
69
  ]
70
+ inputs = processor.apply_chat_template(
71
+ messages,
72
+ tokenize=True,
73
+ return_dict=True,
74
+ return_tensors="pt",
75
+ add_generation_prompt=True,
76
+ ).to(model.device)
77
  input_len = inputs["input_ids"].shape[-1]
78
+ print("input_len:", input_len)
79
+ outputs = model.generate(**inputs, max_new_tokens=32)
80
+ response = processor.decode(outputs[0], skip_special_tokens=False)
81
+ response = response.replace("<|audio|>", "A")
82
+ response = response.replace("<|image|>", "I")
83
+ response = response.replace("<|video|>", "V")
84
+ print(response)
85
  ```
86
 
87
  ### Codes to create this repo:
 
94
  from pathlib import Path
95
 
96
  import torch
 
97
  from huggingface_hub import file_exists, hf_hub_download
98
+
99
  from transformers import (
100
  AutoConfig,
101
  AutoModelForCausalLM,
 
112
  processor = AutoProcessor.from_pretrained(source_model_id)
113
  processor.save_pretrained(save_folder)
114
 
115
+ with open(
116
+ hf_hub_download(source_model_id, filename="config.json", repo_type="model"),
117
+ "r",
118
+ encoding="utf-8",
119
+ ) as f:
120
  config_json = json.load(f)
121
 
122
+ config_json["audio_config"].update(
123
+ {
124
+ "num_attention_heads": 2,
125
+ "num_hidden_layers": 2,
126
+ "hidden_size": 64,
127
+ "output_proj_dims": 32,
128
+ }
129
+ )
130
+ config_json["text_config"].update(
131
+ {
132
+ "global_head_dim": 64,
133
+ "head_dim": 32,
134
+ "hidden_size": 8,
135
+ "hidden_size_per_layer_input": 2,
136
+ "intermediate_size": 64,
137
+ "layer_types": [
138
+ "sliding_attention",
139
+ "full_attention",
140
+ "sliding_attention",
141
+ "full_attention",
142
+ ],
143
+ "num_attention_heads": 8,
144
+ "num_hidden_layers": 4,
145
+ "num_key_value_heads": 4,
146
+ "num_kv_shared_layers": 2,
147
+ }
148
+ )
149
+ config_json["vision_config"].update(
150
+ {
151
+ "num_hidden_layers": 2,
152
+ "hidden_size": 8,
153
+ "intermediate_size": 64,
154
+ "head_dim": 32,
155
+ "global_head_dim": 32,
156
+ "num_attention_heads": 4,
157
+ "num_key_value_heads": 4,
158
+ }
159
+ )
160
 
161
+ with open(f"{save_folder}/config.json", "w", encoding="utf-8") as f:
162
  json.dump(config_json, f, indent=2)
163
 
164
  config = AutoConfig.from_pretrained(
 
170
  torch.set_default_dtype(torch.bfloat16)
171
  model = Gemma4ForConditionalGeneration(config)
172
  torch.set_default_dtype(torch.float32)
173
+ if file_exists(
174
+ filename="generation_config.json", repo_id=source_model_id, repo_type="model"
175
+ ):
176
  model.generation_config = GenerationConfig.from_pretrained(
177
+ source_model_id,
178
+ trust_remote_code=True,
179
  )
180
  set_seed(42)
181
  model = model.cpu()
 
185
  with torch.no_grad():
186
  for name, p in sorted(model.named_parameters()):
187
  torch.nn.init.normal_(p, 0, 0.2)
188
+ print(name, p.shape, f"{p.numel() / all_numels * 100: .4f}%")
189
  model.save_pretrained(save_folder)
190
  ```
191