Inference with transformers==5.3.0 fails

#25
by guillaumeguy - opened

Replication code:

import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
import torch 
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")

input_texts = [
    "what is the capital of China?",
    "how to implement quick sort in python?",
    "Beijing",
    "sorting algorithms"
]

model_path = 'Alibaba-NLP/gte-large-en-v1.5'
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(DEVICE)

# Tokenize the input texts
batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')

with torch.no_grad():
    # move to device
    batch_dict = {k: v.to(DEVICE) for k, v in batch_dict.items()}
    outputs = model(**batch_dict)
embeddings = outputs.last_hidden_state[:, 0]
 
# (Optionally) normalize embeddings
embeddings = F.normalize(embeddings, p=2, dim=1)
scores = (embeddings[:1] @ embeddings[1:].T) * 100
print(scores.tolist())
File ~/.cache/huggingface/modules/transformers_modules/Alibaba_hyphen_NLP/new_hyphen_impl/40ced75c3017eb27626c9d4ea981bde21a2662f4/modeling.py:408, in NewEmbeddings.forward(self, unpad_inputs, input_ids, attention_mask, length, token_type_ids, position_ids, inputs_embeds)
    405             token_type_ids = token_type_ids[attention_mask_bool].unsqueeze(0)
    407     token_type_embeddings = self.token_type_embeddings(token_type_ids)
--> 408     embeddings = embeddings + token_type_embeddings
    410 # BERT position
    411 if self.position_embedding_type == "absolute":

AcceleratorError: CUDA error: device-side assert triggered
Search for `cudaErrorAssert' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Downgrading transformers to 4.36.0 succeeds

Sign up or log in to comment