| """ |
| inference_safetensors.py |
| |
| Defines the architecture of the fine-tuned embedding model used for Off-Topic classification. |
| """ |
| import json |
| import torch |
| import sys |
| import torch.nn as nn |
|
|
| from huggingface_hub import hf_hub_download |
| from safetensors.torch import load_file |
| from transformers import AutoTokenizer, AutoModel |
|
|
| class CrossEncoderWithMLP(nn.Module): |
| def __init__(self, base_model, num_labels=2): |
| super(CrossEncoderWithMLP, self).__init__() |
|
|
| |
| self.base_model = base_model |
| |
| hidden_size = base_model.config.hidden_size |
| |
| self.mlp = nn.Sequential( |
| nn.Linear(hidden_size, hidden_size // 2), |
| nn.ReLU(), |
| nn.Linear(hidden_size // 2, hidden_size // 4), |
| nn.ReLU() |
| ) |
| |
| self.classifier = nn.Linear(hidden_size // 4, num_labels) |
|
|
| def forward(self, input_ids, attention_mask): |
| |
| outputs = self.base_model(input_ids, attention_mask) |
| pooled_output = outputs.pooler_output |
| |
| mlp_output = self.mlp(pooled_output) |
| |
| logits = self.classifier(mlp_output) |
| return logits |
|
|
| |
| repo_path = "govtech/jina-embeddings-v2-small-en-off-topic" |
| |
| config_path = "config.json" |
|
|
| with open(config_path, 'r') as f: |
| config = json.load(f) |
|
|
| def predict(sentence1, sentence2): |
| """ |
| Predicts the label for a pair of sentences using a fine-tuned model with SafeTensors weights. |
| |
| Args: |
| - sentence1 (str): The first input sentence. |
| - sentence2 (str): The second input sentence. |
| |
| Returns: |
| tuple: |
| - predicted_label (int): The predicted label (e.g., 0 or 1). |
| - probabilities (numpy.ndarray): The probabilities for each class. |
| """ |
| |
| model_name = config['classifier']['embedding']['model_name'] |
| max_length = config['classifier']['embedding']['max_length'] |
| model_weights_fp = config['classifier']['embedding']['model_weights_fp'] |
|
|
| |
| device = torch.device("cuda") if torch.cuda.is_available() else "cpu" |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| base_model = AutoModel.from_pretrained(model_name) |
| model = CrossEncoderWithMLP(base_model, num_labels=2) |
|
|
| |
| weights = load_file(model_weights_fp) |
| model.load_state_dict(weights) |
| model.to(device) |
| model.eval() |
|
|
| |
| encoding = tokenizer( |
| sentence1, sentence2, |
| return_tensors="pt", |
| truncation=True, |
| padding="max_length", |
| max_length=max_length, |
| return_token_type_ids=False |
| ) |
| input_ids = encoding["input_ids"].to(device) |
| attention_mask = encoding["attention_mask"].to(device) |
|
|
| |
| with torch.no_grad(): |
| outputs = model(input_ids=input_ids, attention_mask=attention_mask) |
| probabilities = torch.softmax(outputs, dim=1) |
| predicted_label = torch.argmax(probabilities, dim=1).item() |
|
|
| return predicted_label, probabilities.cpu().numpy() |
|
|
| if __name__ == "__main__": |
| |
| input_data = sys.argv[1] |
| sentence_pairs = json.loads(input_data) |
|
|
| |
| if not all(isinstance(pair[0], str) and isinstance(pair[1], str) for pair in sentence_pairs): |
| raise ValueError("Each pair must contain two strings.") |
|
|
| for idx, (sentence1, sentence2) in enumerate(sentence_pairs): |
|
|
| |
| predicted_label, probabilities = predict(sentence1, sentence2) |
|
|
| |
| print(f"Pair {idx + 1}:") |
| print(f" Sentence 1: {sentence1}") |
| print(f" Sentence 2: {sentence2}") |
| print(f" Predicted Label: {predicted_label}") |
| print(f" Probabilities: {probabilities}") |
| print('-' * 50) |
|
|