rydlr commited on
Commit
fd6eef4
·
1 Parent(s): f88caad

Fix HF auth propagation for text encoder loads

Browse files
app.py CHANGED
@@ -65,6 +65,9 @@ def _start_text_encoder_server() -> subprocess.Popen:
65
  hf_token = os.environ.get("HF_TOKEN")
66
  if hf_token:
67
  env["HF_TOKEN"] = hf_token
 
 
 
68
  print(f"[movimento][boot] HF_TOKEN set for text encoder (len={len(hf_token)})")
69
  else:
70
  print(f"[movimento][boot] WARNING: HF_TOKEN not found in environment")
 
65
  hf_token = os.environ.get("HF_TOKEN")
66
  if hf_token:
67
  env["HF_TOKEN"] = hf_token
68
+ env["HUGGING_FACE_HUB_TOKEN"] = hf_token
69
+ env["HF_HUB_TOKEN"] = hf_token
70
+ env["HUGGINGFACEHUB_API_TOKEN"] = hf_token
71
  print(f"[movimento][boot] HF_TOKEN set for text encoder (len={len(hf_token)})")
72
  else:
73
  print(f"[movimento][boot] WARNING: HF_TOKEN not found in environment")
kimodo/model/llm2vec/llm2vec.py CHANGED
@@ -123,12 +123,13 @@ class LLM2Vec(nn.Module):
123
  # pop out encoder args
124
  keys = ["pooling_mode", "max_length", "doc_max_length", "skip_instruction"]
125
  encoder_args = {key: kwargs.pop(key, None) for key in keys if kwargs.get(key) is not None}
 
126
 
127
- tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path)
128
  tokenizer.pad_token = tokenizer.eos_token
129
  tokenizer.padding_side = "left"
130
 
131
- config = AutoConfig.from_pretrained(base_model_name_or_path)
132
  config_class_name = config.__class__.__name__
133
 
134
  model_class = cls._get_model_class(config_class_name, enable_bidirectional=enable_bidirectional)
@@ -146,6 +147,7 @@ class LLM2Vec(nn.Module):
146
  model = PeftModel.from_pretrained(
147
  model,
148
  base_model_name_or_path,
 
149
  )
150
  model = model.merge_and_unload()
151
 
@@ -153,6 +155,7 @@ class LLM2Vec(nn.Module):
153
  model = PeftModel.from_pretrained(
154
  model,
155
  peft_model_name_or_path,
 
156
  )
157
  if merge_peft:
158
  model = model.merge_and_unload()
 
123
  # pop out encoder args
124
  keys = ["pooling_mode", "max_length", "doc_max_length", "skip_instruction"]
125
  encoder_args = {key: kwargs.pop(key, None) for key in keys if kwargs.get(key) is not None}
126
+ hf_token = kwargs.get("token")
127
 
128
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path, token=hf_token)
129
  tokenizer.pad_token = tokenizer.eos_token
130
  tokenizer.padding_side = "left"
131
 
132
+ config = AutoConfig.from_pretrained(base_model_name_or_path, token=hf_token)
133
  config_class_name = config.__class__.__name__
134
 
135
  model_class = cls._get_model_class(config_class_name, enable_bidirectional=enable_bidirectional)
 
147
  model = PeftModel.from_pretrained(
148
  model,
149
  base_model_name_or_path,
150
+ token=hf_token,
151
  )
152
  model = model.merge_and_unload()
153
 
 
155
  model = PeftModel.from_pretrained(
156
  model,
157
  peft_model_name_or_path,
158
+ token=hf_token,
159
  )
160
  if merge_peft:
161
  model = model.merge_and_unload()
kimodo/model/llm2vec/llm2vec_wrapper.py CHANGED
@@ -24,6 +24,7 @@ class LLM2VecEncoder:
24
  self.llm_dim = llm_dim
25
 
26
  cache_dir = os.environ.get("HUGGINGFACE_CACHE_DIR")
 
27
 
28
  if "TEXT_ENCODERS_DIR" in os.environ:
29
  base_model_name_or_path = os.path.join(os.environ["TEXT_ENCODERS_DIR"], base_model_name_or_path)
@@ -34,6 +35,7 @@ class LLM2VecEncoder:
34
  peft_model_name_or_path=peft_model_name_or_path,
35
  torch_dtype=torch_dtype,
36
  cache_dir=cache_dir,
 
37
  )
38
  self.model.eval()
39
  for p in self.model.parameters():
 
24
  self.llm_dim = llm_dim
25
 
26
  cache_dir = os.environ.get("HUGGINGFACE_CACHE_DIR")
27
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
28
 
29
  if "TEXT_ENCODERS_DIR" in os.environ:
30
  base_model_name_or_path = os.path.join(os.environ["TEXT_ENCODERS_DIR"], base_model_name_or_path)
 
35
  peft_model_name_or_path=peft_model_name_or_path,
36
  torch_dtype=torch_dtype,
37
  cache_dir=cache_dir,
38
+ token=hf_token,
39
  )
40
  self.model.eval()
41
  for p in self.model.parameters():
kimodo/scripts/run_text_encoder_server.py CHANGED
@@ -4,6 +4,8 @@
4
  import argparse
5
  import os
6
 
 
 
7
  import gradio as gr
8
  import numpy as np
9
 
@@ -125,7 +127,7 @@ def main():
125
  # Model will be loaded lazily on first request
126
  demo_wrapper_fn = DemoWrapper(args.text_encoder, args.tmp_folder)
127
 
128
- with gr.Blocks(title="Text encoder", css=css, theme=theme) as demo:
129
  gr.Markdown(f"# Text encoder: {display_name}")
130
  gr.Markdown("## Description")
131
  gr.Markdown("Get a embeddings from a text.")
@@ -190,7 +192,7 @@ def main():
190
  )
191
  clear.click(fn=clear_fn, inputs=None, outputs=outputs)
192
 
193
- demo.launch(server_name=server_name, server_port=server_port)
194
 
195
 
196
  if __name__ == "__main__":
 
4
  import argparse
5
  import os
6
 
7
+ os.environ.pop("GRADIO_HOT_RELOAD", None)
8
+
9
  import gradio as gr
10
  import numpy as np
11
 
 
127
  # Model will be loaded lazily on first request
128
  demo_wrapper_fn = DemoWrapper(args.text_encoder, args.tmp_folder)
129
 
130
+ with gr.Blocks(title="Text encoder") as demo:
131
  gr.Markdown(f"# Text encoder: {display_name}")
132
  gr.Markdown("## Description")
133
  gr.Markdown("Get a embeddings from a text.")
 
192
  )
193
  clear.click(fn=clear_fn, inputs=None, outputs=outputs)
194
 
195
+ demo.launch(server_name=server_name, server_port=server_port, theme=theme, css=css)
196
 
197
 
198
  if __name__ == "__main__":