ALJIACHI commited on
Commit
8a3c13d
ยท
verified ยท
1 Parent(s): f0f2e1e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +29 -9
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,23 +1,44 @@
1
  import os
2
  import gradio as gr
3
  import torch
4
- from sentence_transformers import CrossEncoder
5
 
6
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
7
  # Model
8
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
  MODEL_ID = os.environ.get("MODEL_ID", "ALJIACHI/Mizan-Rerank-v2")
 
10
 
11
- model = CrossEncoder(
 
12
  MODEL_ID,
13
- max_length=8192,
14
  trust_remote_code=True,
15
- device="cuda" if torch.cuda.is_available() else "cpu",
16
  )
 
17
 
18
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
19
  # Reranking logic
20
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def rerank(query: str, documents: str) -> str:
22
  """Score and rerank documents against a query."""
23
  if not query.strip():
@@ -27,18 +48,17 @@ def rerank(query: str, documents: str) -> str:
27
  if not docs:
28
  return "โš ๏ธ ูŠุฑุฌู‰ ุฅุฏุฎุงู„ ู…ุณุชู†ุฏ ูˆุงุญุฏ ุนู„ู‰ ุงู„ุฃู‚ู„ (Please enter at least one document)"
29
 
30
- ranks = model.rank(query, docs)
 
31
 
32
  lines = []
33
- for i, entry in enumerate(ranks, 1):
34
- idx = entry["corpus_id"]
35
- score = entry["score"]
36
  doc_text = docs[idx]
37
  preview = doc_text[:200] + ("..." if len(doc_text) > 200 else "")
38
  bar_len = int(score * 20)
39
  bar = "โ–ˆ" * bar_len + "โ–‘" * (20 - bar_len)
40
  lines.append(
41
- f"### #{i} โ€” Score: {score:.4f}\n"
42
  f"`{bar}`\n\n"
43
  f"{preview}\n"
44
  )
 
1
  import os
2
  import gradio as gr
3
  import torch
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
 
6
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
7
  # Model
8
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
  MODEL_ID = os.environ.get("MODEL_ID", "ALJIACHI/Mizan-Rerank-v2")
10
+ MAX_LENGTH = 512
11
 
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
+ model = AutoModelForSequenceClassification.from_pretrained(
14
  MODEL_ID,
 
15
  trust_remote_code=True,
16
+ torch_dtype=torch.float32,
17
  )
18
+ model.eval()
19
 
20
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
21
  # Reranking logic
22
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
23
+ def get_scores(query: str, passages: list[str]) -> list[float]:
24
+ """Score query-passage pairs using the model directly."""
25
+ pairs = [(query, p) for p in passages]
26
+ inputs = tokenizer(
27
+ [q for q, _ in pairs],
28
+ [p for _, p in pairs],
29
+ return_tensors="pt",
30
+ padding=True,
31
+ truncation=True,
32
+ max_length=MAX_LENGTH,
33
+ )
34
+ with torch.no_grad():
35
+ outputs = model(**inputs)
36
+ scores = torch.sigmoid(outputs.logits.squeeze(-1)).tolist()
37
+ if isinstance(scores, float):
38
+ scores = [scores]
39
+ return scores
40
+
41
+
42
  def rerank(query: str, documents: str) -> str:
43
  """Score and rerank documents against a query."""
44
  if not query.strip():
 
48
  if not docs:
49
  return "โš ๏ธ ูŠุฑุฌู‰ ุฅุฏุฎุงู„ ู…ุณุชู†ุฏ ูˆุงุญุฏ ุนู„ู‰ ุงู„ุฃู‚ู„ (Please enter at least one document)"
50
 
51
+ scores = get_scores(query, docs)
52
+ ranked = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)
53
 
54
  lines = []
55
+ for rank, (idx, score) in enumerate(ranked, 1):
 
 
56
  doc_text = docs[idx]
57
  preview = doc_text[:200] + ("..." if len(doc_text) > 200 else "")
58
  bar_len = int(score * 20)
59
  bar = "โ–ˆ" * bar_len + "โ–‘" * (20 - bar_len)
60
  lines.append(
61
+ f"### #{rank} โ€” Score: {score:.4f}\n"
62
  f"`{bar}`\n\n"
63
  f"{preview}\n"
64
  )
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- sentence-transformers>=5.0.0
2
  transformers>=4.40.0
3
  torch>=2.0.0
4
  gradio>=5.0.0
 
 
1
  transformers>=4.40.0
2
  torch>=2.0.0
3
  gradio>=5.0.0