import torch import warnings warnings.filterwarnings("ignore") # Let's check what generation returns for an empty input from transformers import WhisperForConditionalGeneration, WhisperProcessor model_name_or_path = "openai/whisper-large-v3" processor = WhisperProcessor.from_pretrained(model_name_or_path) model = WhisperForConditionalGeneration.from_pretrained(model_name_or_path, torch_dtype=torch.float16).to("cuda") inputs = torch.zeros(1, 128, 3000, dtype=torch.float16, device="cuda") out = model.generate(inputs, return_timestamps=True, num_beams=1) print("Output shape:", out.shape) print("Output type:", type(out)) out_list = out.cpu().numpy().tolist() print("Output list type:", type(out_list)) if len(out_list) > 0 and isinstance(out_list[0], list): print("Depth 2:", type(out_list[0][0])) try: processor.tokenizer.batch_decode(out_list, skip_special_tokens=False, decode_with_timestamps=True) print("Batch decode successful") except Exception as e: import traceback traceback.print_exc()