ALJIACHI's picture
Upload folder using huggingface_hub
6632c9a verified
import os
import gradio as gr
import torch
from sentence_transformers import CrossEncoder
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Model
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
MODEL_ID = os.environ.get("MODEL_ID", "ALJIACHI/Mizan-Rerank-v2")
model = CrossEncoder(
MODEL_ID,
max_length=8192,
trust_remote_code=True,
device="cuda" if torch.cuda.is_available() else "cpu",
)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Reranking logic
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def rerank(query: str, documents: str) -> str:
"""Score and rerank documents against a query."""
if not query.strip():
return "โš ๏ธ ูŠุฑุฌู‰ ุฅุฏุฎุงู„ ุงุณุชุนู„ุงู… (Please enter a query)"
docs = [d.strip() for d in documents.strip().split("\n") if d.strip()]
if not docs:
return "โš ๏ธ ูŠุฑุฌู‰ ุฅุฏุฎุงู„ ู…ุณุชู†ุฏ ูˆุงุญุฏ ุนู„ู‰ ุงู„ุฃู‚ู„ (Please enter at least one document)"
ranks = model.rank(query, docs)
lines = []
for i, entry in enumerate(ranks, 1):
idx = entry["corpus_id"]
score = entry["score"]
doc_text = docs[idx]
preview = doc_text[:200] + ("..." if len(doc_text) > 200 else "")
score = score if score == score else 0.0 # NaN safety
bar_len = int(score * 20)
bar = "โ–ˆ" * bar_len + "โ–‘" * (20 - bar_len)
lines.append(
f"### #{i} โ€” Score: {score:.4f}\n"
f"`{bar}`\n\n"
f"{preview}\n"
)
header = f"## ๐Ÿ“Š Results โ€” {len(docs)} documents reranked\n---\n"
return header + "\n---\n".join(lines)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Examples
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
EXAMPLES = [
[
"ู…ุง ู‡ูˆ ุชูุณูŠุฑ ุงู„ุขูŠุฉ ูˆุฌุนู„ู†ุง ู…ู† ุงู„ู…ุงุก ูƒู„ ุดูŠุก ุญูŠ",
"ุชุนู†ูŠ ุงู„ุขูŠุฉ ุฃู† ุงู„ู…ุงุก ู‡ูˆ ุนู†ุตุฑ ุฃุณุงุณูŠ ููŠ ุญูŠุงุฉ ุฌู…ูŠุน ุงู„ูƒุงุฆู†ุงุช ุงู„ุญูŠุฉุŒ ูˆู‡ูˆ ุถุฑูˆุฑูŠ ู„ุงุณุชู…ุฑุงุฑ ุงู„ุญูŠุงุฉ.\n"
"ุชู… ุงูƒุชุดุงู ูƒูˆุงูƒุจ ุฎุงุฑุฌ ุงู„ู…ุฌู…ูˆุนุฉ ุงู„ุดู…ุณูŠุฉ ุชุญุชูˆูŠ ุนู„ู‰ ู…ูŠุงู‡ ู…ุชุฌู…ุฏุฉ.\n"
"ุชุญุฏุซ ุงู„ู‚ุฑุขู† ุงู„ูƒุฑูŠู… ุนู† ุงู„ุจุฑู‚ ูˆุงู„ุฑุนุฏ ููŠ ุนุฏุฉ ู…ูˆุงุถุน ู…ุฎุชู„ูุฉ.",
],
[
"ู…ุง ู‡ูŠ ููˆุงุฆุฏ ููŠุชุงู…ูŠู† ุฏุŸ",
"ูŠุณุงุนุฏ ููŠุชุงู…ูŠู† ุฏ ููŠ ุชุนุฒูŠุฒ ุตุญุฉ ุงู„ุนุธุงู… ูˆุชู‚ูˆูŠุฉ ุงู„ุฌู‡ุงุฒ ุงู„ู…ู†ุงุนูŠุŒ ูƒู…ุง ูŠู„ุนุจ ุฏูˆุฑุงู‹ ู…ู‡ู…ุงู‹ ููŠ ุงู…ุชุตุงุต ุงู„ูƒุงู„ุณูŠูˆู….\n"
"ูŠุณุชุฎุฏู… ููŠุชุงู…ูŠู† ุฏ ููŠ ุจุนุถ ุงู„ุตู†ุงุนุงุช ุงู„ุบุฐุงุฆูŠุฉ ูƒู…ุงุฏุฉ ุญุงูุธุฉ.\n"
"ุฃุทู„ู‚ุช ูˆุฒุงุฑุฉ ุงู„ุฒุฑุงุนุฉ ุญู…ู„ุฉ ูˆุทู†ูŠุฉ ู„ุฒูŠุงุฏุฉ ุงู„ูˆุนูŠ ุจุฃู‡ู…ูŠุฉ ุงู„ุฒุฑุงุนุฉ ุงู„ุนุถูˆูŠุฉ.",
],
[
"ู…ุง ุญูƒู… ุงู„ุตู„ุงุฉ ููŠ ุงู„ุฅุณู„ุงู…ุŸ",
"ุงู„ุตู„ุงุฉ ู‡ูŠ ุงู„ุฑูƒู† ุงู„ุซุงู†ูŠ ู…ู† ุฃุฑูƒุงู† ุงู„ุฅุณู„ุงู… ูˆู‡ูŠ ูˆุงุฌุจุฉ ุนู„ู‰ ูƒู„ ู…ุณู„ู… ุจุงู„ุบ ุนุงู‚ู„ ุฎู…ุณ ู…ุฑุงุช ููŠ ุงู„ูŠูˆู… ูˆุงู„ู„ูŠู„ุฉ.\n"
"ูŠูุณุชุญุจ ู„ู„ู…ุณู„ู… ุฃู† ูŠุตู„ูŠ ุตู„ุงุฉ ุงู„ู†ูˆุงูู„ ู„ุฒูŠุงุฏุฉ ุงู„ุฃุฌุฑ ูˆุงู„ุซูˆุงุจ.\n"
"ุชุฃุณุณุช ุงู„ุฌุงู…ุนุฉ ุงู„ุฅุณู„ุงู…ูŠุฉ ููŠ ุงู„ู…ุฏูŠู†ุฉ ุงู„ู…ู†ูˆุฑุฉ ุนุงู… 1961 ู…ูŠู„ุงุฏูŠ.\n"
"ูŠุฌุจ ุนู„ู‰ ุงู„ู…ุณู„ู… ุงู„ุทู‡ุงุฑุฉ ู‚ุจู„ ุฃุฏุงุก ุงู„ุตู„ุงุฉ ูˆู‡ูŠ ุดุฑุท ู…ู† ุดุฑูˆุท ุตุญุชู‡ุง.",
],
]
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Gradio UI
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
CSS = """
.gradio-container { max-width: 900px !important; }
.header-text { text-align: center; }
.rtl-text textarea { direction: rtl; text-align: right; font-size: 16px; }
"""
with gr.Blocks(css=CSS, theme=gr.themes.Soft(), title="Mizan-Rerank-v2 Demo") as demo:
gr.HTML(
"""
<div class="header-text">
<h1>๐Ÿ” Mizan-Rerank-v2</h1>
<p style="font-size:16px; color:#555;">
Arabic Long-Context Reranking Model ยท 305M Parameters ยท Up to 8192 Tokens
</p>
<p style="font-size:14px;">
<a href="https://huggingface.co/ALJIACHI/Mizan-Rerank-v2" target="_blank">Model Card</a> ยท
</p>
</div>
"""
)
with gr.Row():
with gr.Column(scale=1):
query_input = gr.Textbox(
label="๐Ÿ”Ž Query / ุงู„ุงุณุชุนู„ุงู…",
placeholder="ุฃุฏุฎู„ ุงู„ุงุณุชุนู„ุงู… ู‡ู†ุง...",
lines=2,
elem_classes=["rtl-text"],
)
docs_input = gr.Textbox(
label="๐Ÿ“„ Documents (one per line) / ุงู„ู…ุณุชู†ุฏุงุช (ุณุทุฑ ู„ูƒู„ ู…ุณุชู†ุฏ)",
placeholder="ุฃุฏุฎู„ ูƒู„ ู…ุณุชู†ุฏ ููŠ ุณุทุฑ ู…ู†ูุตู„...",
lines=8,
elem_classes=["rtl-text"],
)
rerank_btn = gr.Button("โšก Rerank / ุฅุนุงุฏุฉ ุงู„ุชุฑุชูŠุจ", variant="primary", size="lg")
with gr.Column(scale=1):
output = gr.Markdown(label="Results / ุงู„ู†ุชุงุฆุฌ")
gr.Examples(
examples=EXAMPLES,
inputs=[query_input, docs_input],
label="๐Ÿ“Œ Try an Example / ุฌุฑุจ ู…ุซุงู„ุงู‹",
)
rerank_btn.click(fn=rerank, inputs=[query_input, docs_input], outputs=output)
query_input.submit(fn=rerank, inputs=[query_input, docs_input], outputs=output)
demo.launch()