Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- README.md +13 -7
- app.py +132 -0
- requirements.txt +4 -0
README.md
CHANGED
|
@@ -1,14 +1,20 @@
|
|
| 1 |
---
|
| 2 |
-
title: Mizan
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
-
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Mizan-Rerank-V2 Demo
|
| 3 |
+
emoji: ๐
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.29.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
+
models:
|
| 12 |
+
- ALJIACHI/Mizan-Rerank-v2
|
| 13 |
+
short_description: Arabic Long-Context Reranking Model Demo
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# Mizan-Rerank-v2 Demo
|
| 17 |
+
|
| 18 |
+
Interactive demo for **Mizan-Rerank-v2**, a 305M-parameter cross-encoder model for Arabic long-context text reranking.
|
| 19 |
+
|
| 20 |
+
Enter a query and a list of documents (one per line) to see them reranked by relevance.
|
app.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import torch
|
| 4 |
+
from sentence_transformers import CrossEncoder
|
| 5 |
+
|
| 6 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 7 |
+
# Model
|
| 8 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 9 |
+
MODEL_ID = os.environ.get(
|
| 10 |
+
"MODEL_ID",
|
| 11 |
+
# Local path for testing โ change to "ALJIACHI/Mizan-Rerank-v2" for HF Space deployment
|
| 12 |
+
os.path.join(os.path.dirname(__file__), "..", "Mizan-Reranker-v2"),
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
model = CrossEncoder(
|
| 16 |
+
MODEL_ID,
|
| 17 |
+
max_length=8192,
|
| 18 |
+
trust_remote_code=True,
|
| 19 |
+
device="cuda" if torch.cuda.is_available() else "cpu",
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 23 |
+
# Reranking logic
|
| 24 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 25 |
+
def rerank(query: str, documents: str) -> str:
|
| 26 |
+
"""Score and rerank documents against a query."""
|
| 27 |
+
if not query.strip():
|
| 28 |
+
return "โ ๏ธ ูุฑุฌู ุฅุฏุฎุงู ุงุณุชุนูุงู
(Please enter a query)"
|
| 29 |
+
|
| 30 |
+
docs = [d.strip() for d in documents.strip().split("\n") if d.strip()]
|
| 31 |
+
if not docs:
|
| 32 |
+
return "โ ๏ธ ูุฑุฌู ุฅุฏุฎุงู ู
ุณุชูุฏ ูุงุญุฏ ุนูู ุงูุฃูู (Please enter at least one document)"
|
| 33 |
+
|
| 34 |
+
ranks = model.rank(query, docs)
|
| 35 |
+
|
| 36 |
+
lines = []
|
| 37 |
+
for i, entry in enumerate(ranks, 1):
|
| 38 |
+
idx = entry["corpus_id"]
|
| 39 |
+
score = entry["score"]
|
| 40 |
+
doc_text = docs[idx]
|
| 41 |
+
preview = doc_text[:200] + ("..." if len(doc_text) > 200 else "")
|
| 42 |
+
bar_len = int(score * 20)
|
| 43 |
+
bar = "โ" * bar_len + "โ" * (20 - bar_len)
|
| 44 |
+
lines.append(
|
| 45 |
+
f"### #{i} โ Score: {score:.4f}\n"
|
| 46 |
+
f"`{bar}`\n\n"
|
| 47 |
+
f"{preview}\n"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
header = f"## ๐ Results โ {len(docs)} documents reranked\n---\n"
|
| 51 |
+
return header + "\n---\n".join(lines)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 55 |
+
# Examples
|
| 56 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 57 |
+
EXAMPLES = [
|
| 58 |
+
[
|
| 59 |
+
"ู
ุง ูู ุชูุณูุฑ ุงูุขูุฉ ูุฌุนููุง ู
ู ุงูู
ุงุก ูู ุดูุก ุญู",
|
| 60 |
+
"ุชุนูู ุงูุขูุฉ ุฃู ุงูู
ุงุก ูู ุนูุตุฑ ุฃุณุงุณู ูู ุญูุงุฉ ุฌู
ูุน ุงููุงุฆูุงุช ุงูุญูุฉุ ููู ุถุฑูุฑู ูุงุณุชู
ุฑุงุฑ ุงูุญูุงุฉ.\n"
|
| 61 |
+
"ุชู
ุงูุชุดุงู ููุงูุจ ุฎุงุฑุฌ ุงูู
ุฌู
ูุนุฉ ุงูุดู
ุณูุฉ ุชุญุชูู ุนูู ู
ูุงู ู
ุชุฌู
ุฏุฉ.\n"
|
| 62 |
+
"ุชุญุฏุซ ุงููุฑุขู ุงููุฑูู
ุนู ุงูุจุฑู ูุงูุฑุนุฏ ูู ุนุฏุฉ ู
ูุงุถุน ู
ุฎุชููุฉ.",
|
| 63 |
+
],
|
| 64 |
+
[
|
| 65 |
+
"ู
ุง ูู ููุงุฆุฏ ููุชุงู
ูู ุฏุ",
|
| 66 |
+
"ูุณุงุนุฏ ููุชุงู
ูู ุฏ ูู ุชุนุฒูุฒ ุตุญุฉ ุงูุนุธุงู
ูุชูููุฉ ุงูุฌูุงุฒ ุงูู
ูุงุนูุ ูู
ุง ููุนุจ ุฏูุฑุงู ู
ูู
ุงู ูู ุงู
ุชุตุงุต ุงููุงูุณููู
.\n"
|
| 67 |
+
"ูุณุชุฎุฏู
ููุชุงู
ูู ุฏ ูู ุจุนุถ ุงูุตูุงุนุงุช ุงูุบุฐุงุฆูุฉ ูู
ุงุฏุฉ ุญุงูุธุฉ.\n"
|
| 68 |
+
"ุฃุทููุช ูุฒุงุฑุฉ ุงูุฒุฑุงุนุฉ ุญู
ูุฉ ูุทููุฉ ูุฒูุงุฏุฉ ุงููุนู ุจุฃูู
ูุฉ ุงูุฒุฑุงุนุฉ ุงูุนุถููุฉ.",
|
| 69 |
+
],
|
| 70 |
+
[
|
| 71 |
+
"ู
ุง ุญูู
ุงูุตูุงุฉ ูู ุงูุฅุณูุงู
ุ",
|
| 72 |
+
"ุงูุตูุงุฉ ูู ุงูุฑูู ุงูุซุงูู ู
ู ุฃุฑูุงู ุงูุฅุณูุงู
ููู ูุงุฌุจุฉ ุนูู ูู ู
ุณูู
ุจุงูุบ ุนุงูู ุฎู
ุณ ู
ุฑุงุช ูู ุงูููู
ูุงููููุฉ.\n"
|
| 73 |
+
"ููุณุชุญุจ ููู
ุณูู
ุฃู ูุตูู ุตูุงุฉ ุงูููุงูู ูุฒูุงุฏุฉ ุงูุฃุฌุฑ ูุงูุซูุงุจ.\n"
|
| 74 |
+
"ุชุฃุณุณุช ุงูุฌุงู
ุนุฉ ุงูุฅุณูุงู
ูุฉ ูู ุงูู
ุฏููุฉ ุงูู
ููุฑุฉ ุนุงู
1961 ู
ููุงุฏู.\n"
|
| 75 |
+
"ูุฌุจ ุนูู ุงูู
ุณูู
ุงูุทูุงุฑุฉ ูุจู ุฃุฏุงุก ุงูุตูุงุฉ ููู ุดุฑุท ู
ู ุดุฑูุท ุตุญุชูุง.",
|
| 76 |
+
],
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 81 |
+
# Gradio UI
|
| 82 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ๏ฟฝ๏ฟฝ๏ฟฝโโโโโโโโโโโโโโโโโโ
|
| 83 |
+
CSS = """
|
| 84 |
+
.gradio-container { max-width: 900px !important; }
|
| 85 |
+
.header-text { text-align: center; }
|
| 86 |
+
.rtl-text textarea { direction: rtl; text-align: right; font-size: 16px; }
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
with gr.Blocks(css=CSS, theme=gr.themes.Soft(), title="Mizan-Rerank-v2 Demo") as demo:
|
| 90 |
+
gr.HTML(
|
| 91 |
+
"""
|
| 92 |
+
<div class="header-text">
|
| 93 |
+
<h1>๐ Mizan-Rerank-v2</h1>
|
| 94 |
+
<p style="font-size:16px; color:#555;">
|
| 95 |
+
Arabic Long-Context Reranking Model ยท 305M Parameters ยท Up to 8192 Tokens
|
| 96 |
+
</p>
|
| 97 |
+
<p style="font-size:14px;">
|
| 98 |
+
<a href="https://huggingface.co/ALJIACHI/Mizan-Rerank-v2" target="_blank">Model Card</a> ยท
|
| 99 |
+
</p>
|
| 100 |
+
</div>
|
| 101 |
+
"""
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
with gr.Row():
|
| 105 |
+
with gr.Column(scale=1):
|
| 106 |
+
query_input = gr.Textbox(
|
| 107 |
+
label="๐ Query / ุงูุงุณุชุนูุงู
",
|
| 108 |
+
placeholder="ุฃุฏุฎู ุงูุงุณุชุนูุงู
ููุง...",
|
| 109 |
+
lines=2,
|
| 110 |
+
elem_classes=["rtl-text"],
|
| 111 |
+
)
|
| 112 |
+
docs_input = gr.Textbox(
|
| 113 |
+
label="๐ Documents (one per line) / ุงูู
ุณุชูุฏุงุช (ุณุทุฑ ููู ู
ุณุชูุฏ)",
|
| 114 |
+
placeholder="ุฃุฏุฎู ูู ู
ุณุชูุฏ ูู ุณุทุฑ ู
ููุตู...",
|
| 115 |
+
lines=8,
|
| 116 |
+
elem_classes=["rtl-text"],
|
| 117 |
+
)
|
| 118 |
+
rerank_btn = gr.Button("โก Rerank / ุฅุนุงุฏุฉ ุงูุชุฑุชูุจ", variant="primary", size="lg")
|
| 119 |
+
|
| 120 |
+
with gr.Column(scale=1):
|
| 121 |
+
output = gr.Markdown(label="Results / ุงููุชุงุฆุฌ")
|
| 122 |
+
|
| 123 |
+
gr.Examples(
|
| 124 |
+
examples=EXAMPLES,
|
| 125 |
+
inputs=[query_input, docs_input],
|
| 126 |
+
label="๐ Try an Example / ุฌุฑุจ ู
ุซุงูุงู",
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
rerank_btn.click(fn=rerank, inputs=[query_input, docs_input], outputs=output)
|
| 130 |
+
query_input.submit(fn=rerank, inputs=[query_input, docs_input], outputs=output)
|
| 131 |
+
|
| 132 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sentence-transformers>=5.0.0
|
| 2 |
+
transformers>=4.40.0
|
| 3 |
+
torch>=2.0.0
|
| 4 |
+
gradio>=5.0.0
|