hq-bench
/

coreb-code-reranker

@@ -55,34 +55,55 @@ Reranking delta on CoREB v202603, using C2LLM-7B as the first-stage retriever:
 ## Usage
-CoREB-Reranker follows the same usage pattern as Qwen3-Reranker. Given a query and a list of candidate documents from first-stage retrieval, the reranker scores each query-document pair:
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_id = "hq-bench/coreb-code-reranker"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
-# Format as Qwen3-Reranker input
-query = "binary search implementation"
-document = "def binary_search(arr, target):\n    lo, hi = 0, len(arr) - 1\n    ..."
-prefix = '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n'
-suffix = "<|im_end|>\n<|im_start|>assistant\n"
-instruct = "Given a code search query, does the following code snippet match the query intent?"
-prompt = f"{prefix}<Instruct>: {instruct}\n<Query>: {query}\n<Document>: {document}{suffix}"
-inputs = tokenizer(prompt, return_tensors="pt")
-outputs = model(**inputs)
-# Score is the logit difference between "yes" and "no" tokens
 yes_id = tokenizer.convert_tokens_to_ids("yes")
 no_id = tokenizer.convert_tokens_to_ids("no")
-logits = outputs.logits[0, -1, :]
-score = logits[yes_id] - logits[no_id]
-print(f"Relevance score: {score.item():.4f}")
 ```
 For batch reranking with the CoREB evaluation pipeline, see the [CoREB repository](https://github.com/hq-bench/coreb).

 ## Usage
+CoREB-Reranker follows the same usage pattern as Qwen3-Reranker. The instruction is **task-specific** — use the appropriate one for your retrieval task:
 ```python
+from enum import Enum
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+class Task(Enum):
+    TEXT_TO_CODE = "Given a natural language programming task, retrieve code that correctly solves or implements the task."
+    CODE_TO_CODE = "Given a code snippet, retrieve code that is semantically equivalent or solves the same task."
+    CODE_TO_TEXT = "Given a code snippet, retrieve the natural language description or problem statement that best matches the code."
 model_id = "hq-bench/coreb-code-reranker"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, trust_remote_code=True)
+model.eval()
+PREFIX = '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n'
+SUFFIX = "<|im_end|>\n<|im_start|>assistant\n"
 yes_id = tokenizer.convert_tokens_to_ids("yes")
 no_id = tokenizer.convert_tokens_to_ids("no")
+def score(query: str, document: str, task: Task) -> float:
+    prompt = f"{PREFIX}<Instruct>: {task.value}\n<Query>: {query}\n<Document>: {document}{SUFFIX}"
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
+    with torch.no_grad():
+        logits = model(**inputs).logits[0, -1, :]
+    return (logits[yes_id] - logits[no_id]).item()
+# Text-to-Code: natural language query -> code
+print(score(
+    query="binary search implementation",
+    document="def binary_search(arr, target):\n    lo, hi = 0, len(arr) - 1\n    ...",
+    task=Task.TEXT_TO_CODE,
+))
+# Code-to-Code: code -> semantically equivalent code
+print(score(
+    query="def binary_search(arr, target): ...",
+    document="int binarySearch(int[] arr, int target) { ... }",
+    task=Task.CODE_TO_CODE,
+))
+# Code-to-Text: code -> problem description
+print(score(
+    query="def binary_search(arr, target): ...",
+    document="Find the index of a target value in a sorted array using binary search.",
+    task=Task.CODE_TO_TEXT,
+))
 ```
 For batch reranking with the CoREB evaluation pipeline, see the [CoREB repository](https://github.com/hq-bench/coreb).