JiaqiXue commited on
Commit
230798d
·
verified ·
1 Parent(s): 5bc5cb2

Fix: use transformers AutoModel instead of sentence_transformers for Qwen3 compatibility

Browse files
Files changed (1) hide show
  1. README.md +14 -13
README.md CHANGED
@@ -34,15 +34,16 @@ Official leaderboard results on 8,400 queries:
34
  ### Installation
35
 
36
  ```bash
37
- pip install scikit-learn numpy joblib huggingface_hub sentence-transformers
38
  ```
39
 
40
  ### Complete Example
41
 
42
  ```python
43
  from huggingface_hub import snapshot_download
44
- from sentence_transformers import SentenceTransformer
45
- import sys
 
46
 
47
  # 1. Download router
48
  path = snapshot_download("JiaqiXue/r2-router")
@@ -54,8 +55,14 @@ from router import R2Router
54
  router = R2Router.from_pretrained(path)
55
 
56
  # 3. Embed your query with Qwen3-0.6B (1024-dim)
57
- embedder = SentenceTransformer("Qwen/Qwen3-0.6B")
58
- embedding = embedder.encode("What is the capital of France?")
 
 
 
 
 
 
59
 
60
  # 4. Route!
61
  result = router.route(embedding)
@@ -81,17 +88,11 @@ router = R2Router.from_training_data(path, k=80)
81
 
82
  ### Alternative: vLLM Embeddings (Faster for Batches)
83
 
84
- ```python
85
- from vllm import LLM
86
- llm = LLM(model="Qwen/Qwen3-0.6B", runner="pooling")
87
- outputs = llm.embed(["What is the capital of France?"])
88
- embedding = outputs[0].outputs.embedding
89
- ```
90
-
91
- Or with vLLM for faster batch inference:
92
 
93
  ```python
94
  from vllm import LLM
 
95
  llm = LLM(model="Qwen/Qwen3-0.6B", runner="pooling")
96
  outputs = llm.embed(["What is the capital of France?"])
97
  embedding = outputs[0].outputs.embedding
 
34
  ### Installation
35
 
36
  ```bash
37
+ pip install scikit-learn numpy joblib huggingface_hub torch transformers>=4.51
38
  ```
39
 
40
  ### Complete Example
41
 
42
  ```python
43
  from huggingface_hub import snapshot_download
44
+ import sys, torch
45
+ import numpy as np
46
+ from transformers import AutoModel, AutoTokenizer
47
 
48
  # 1. Download router
49
  path = snapshot_download("JiaqiXue/r2-router")
 
55
  router = R2Router.from_pretrained(path)
56
 
57
  # 3. Embed your query with Qwen3-0.6B (1024-dim)
58
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
59
+ model = AutoModel.from_pretrained("Qwen/Qwen3-0.6B")
60
+
61
+ query = "What is the capital of France?"
62
+ inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True)
63
+ with torch.no_grad():
64
+ output = model(**inputs)
65
+ embedding = output.last_hidden_state.mean(dim=1).squeeze().numpy()
66
 
67
  # 4. Route!
68
  result = router.route(embedding)
 
88
 
89
  ### Alternative: vLLM Embeddings (Faster for Batches)
90
 
91
+ For GPU-accelerated batch embedding:
 
 
 
 
 
 
 
92
 
93
  ```python
94
  from vllm import LLM
95
+
96
  llm = LLM(model="Qwen/Qwen3-0.6B", runner="pooling")
97
  outputs = llm.embed(["What is the capital of France?"])
98
  embedding = outputs[0].outputs.embedding