diff --git "a/eval/qa_pairs_accepted.csv" "b/eval/qa_pairs_accepted.csv" deleted file mode 100644--- "a/eval/qa_pairs_accepted.csv" +++ /dev/null @@ -1,1546 +0,0 @@ -discussion_title;discussion_url;discussion_topic_id;discussion_category;discussion_created_at;thread;question;solution;Accept -"QLoRA - model isn’t training";https://discuss.huggingface.co/t/qlora-model-isnt-training/169337;169337;5;2025-10-22 11:19:32.837000+00:00;"[{'id': 243954, 'name': 'Anton Bartash', 'username': 'antbartash', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/46a35a/{size}.png', 'created_at': '2025-10-22T11:19:32.912Z', 'cooked': '

Hi everyone,
\nI’ve been trying to switch from LoRA to QLoRA on an Nvidia T4, but I’m running into an issue where the evaluation loss stays completely flat, while the training loss fluctuates around its initial value.

\n

My LoRA setup works fine, but adding bnb_config, model.gradient_checkpointing_enable(), and model = prepare_model_for_kbit_training(model) causes the issue described above.
\n

10000003961455×959 167 KB

\n

Since the non-quantized version runs without problems, I don’t think the issue is related to the LoRA config, dataset, or formatting functions. The number of trainable parameters is non-zero for both the LoRA and QLoRA setups.

\n

Below is the code I’m using for QLoRA. Any help would be appreciated!

\n
ds_train_with_assistant_content = ds_train.map(construct_message_with_assistant_content)\nds_valid_with_assistant_content = ds_valid.map(construct_message_with_assistant_content)\n\nbnb_config = BitsAndBytesConfig(\n    load_in_4bit=True,\n    bnb_4bit_use_double_quant=True,\n    bnb_4bit_quant_type=""nf4"",\n    bnb_4bit_compute_dtype=torch.bfloat16\n)\n\ncheckpoint = ""Qwen/Qwen3-0.6B""\ntokenizer = AutoTokenizer.from_pretrained(checkpoint)\nmodel = AutoModelForCausalLM.from_pretrained(\n    checkpoint,\n    device_map=""auto"",\n    quantization_config=bnb_config\n)\n\nmodel.config.use_cache = False\nmodel.gradient_checkpointing_enable()\nmodel = prepare_model_for_kbit_training(model)\nmodel.enable_input_require_grads()\n\n\ntimestamp = datetime.now().strftime(\'%Y-%m-%d_%H-%M-%S\')\nRUN_NAME = f\'qlora-final-model-all-linear-r64-{timestamp}\'\nwandb.init(\n    project=os.environ[""WANDB_PROJECT""],\n    name=RUN_NAME,\n    # id=run_id,         # resume previous run if available\n    resume=""allow"",    # allows resuming crashed run\n)\n\n\nRESUME_TRAINING = False\nOUTPUT_DIR = ""./qlora-final_model_all_linear_r64-output""\nPER_DEVICE_BATCH_SIZE = 2  # higher values --> OOM\n\noptimizer = \'paged_adamw_8bit\'\neffective_batch_size = 16\nlearning_rate = 1e-5\nweight_decay = 0.0\nbetas = (0.9, 0.9999)\nwarmup_ratio = 0.2\nepochs = 1\ngradient_accumulation_steps = int(effective_batch_size / PER_DEVICE_BATCH_SIZE)\nlora_r = 16*4\nlora_alpha = 64*4\nlora_dropout = 0.01\n\n\ntraining_args = TrainingArguments(\n    output_dir=OUTPUT_DIR,\n    per_device_train_batch_size=PER_DEVICE_BATCH_SIZE,\n    gradient_accumulation_steps=gradient_accumulation_steps,\n    learning_rate=learning_rate,\n    optim=optimizer, \n    num_train_epochs=epochs,\n    weight_decay=weight_decay,\n    lr_scheduler_type=""cosine"",\n    warmup_ratio=warmup_ratio,\n    save_strategy=""steps"",\n    save_steps=gradient_accumulation_steps*5,\n    save_total_limit=2,\n    eval_strategy=""steps"",\n    eval_steps=gradient_accumulation_steps*5,\n    logging_strategy=""steps"",\n    logging_steps=gradient_accumulation_steps*5,\n    report_to=[\'wandb\'],\n    run_name=RUN_NAME,\n    bf16=True,\n    # fp16=True,\n    # fp16_full_eval=True,\n    metric_for_best_model=""eval_loss"",\n    greater_is_better=False,\n    max_grad_norm=1,\n    load_best_model_at_end=True,\n    gradient_checkpointing=True,\n    gradient_checkpointing_kwargs={""use_reentrant"": False}\n)\n\n\npeft_config = LoraConfig(\n    r=lora_r,\n    lora_alpha=lora_alpha,\n    lora_dropout=lora_dropout,\n    bias=""none"",\n    task_type=""CAUSAL_LM"",\n    target_modules=\'all-linear\'\n)\n# model.requires_grad_(False)                     # freeze base weights (precautionary)\nmodel_peft = get_peft_model(model, peft_config) # inject a LoRA adapter\nprint_trainable_parameters(model_peft)\n\ntrainer = SFTTrainer(\n    model=model_peft,\n    train_dataset=ds_train_with_assistant_content,\n    eval_dataset=ds_valid_with_assistant_content,\n    formatting_func=formatting_func,\n    args=training_args,\n    callbacks=[EarlyStoppingCallback(early_stopping_patience=25)]\n)\n\n\n# Training setup summary\ndataset_size = len(ds_train_with_assistant_content)\nsteps_per_epoch = dataset_size // (PER_DEVICE_BATCH_SIZE * gradient_accumulation_steps)\ntotal_steps = steps_per_epoch * epochs\nwarmup_steps = int(total_steps * warmup_ratio)\n\nprint(""===== Training Setup Summary ====="")\nprint(f""Num epochs:            {epochs}"")\nprint(f""Effective batch size:  {effective_batch_size}"")\nprint(f""Per-device batch size: {PER_DEVICE_BATCH_SIZE}"")\nprint(f""Gradient accumulation: {gradient_accumulation_steps}"")\nprint(f""Dataset size:          {dataset_size}"")\nprint(f""Steps per epoch:       {steps_per_epoch}"")\nprint(f""Total training steps:  {total_steps}"")\nprint(f""Warmup steps:          {warmup_steps}"")\nprint(f""Logging steps:         {training_args.logging_steps}"")\nprint(""==================================="")\nprint(f""Start time: {datetime.now().strftime(\'%Y-%m-%d_%H-%M-%S\')}"")\n\n\n# Training\nlast_checkpoint = None\nif RESUME_TRAINING and os.path.isdir(OUTPUT_DIR):\n    last_checkpoint = get_last_checkpoint(OUTPUT_DIR)\n\nif last_checkpoint is not None:\n    print(f""Resuming training from checkpoint: {last_checkpoint}"")\n    trainer.train(resume_from_checkpoint=last_checkpoint)\nelse:\n    print(""Starting fresh training run"")\n    trainer.train()\n\nprint(f""End time: {datetime.now().strftime(\'%Y-%m-%d_%H-%M-%S\')}"")\n\n\n# WandB logging of eval metrics\nfor log in trainer.state.log_history:\n    if \'eval_loss\' in log:\n        wandb.log({\n            ""eval_loss"": log[\'eval_loss\'],\n            ""eval_perplexity"": math.exp(log[\'eval_loss\']),\n            ""step"": log[\'step\'],\n            ""learning_rate"": learning_rate,\n            ""weight_decay"": weight_decay,\n            ""betas"": betas,\n            ""warmup_ratio"": warmup_ratio,\n            ""effective_batch_size"": effective_batch_size,\n            ""optimizer"": optimizer\n        })\n\nwandb.finish()  # finish the run
', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-22T11:19:32.912Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 32, 'reads': 8, 'readers_count': 7, 'score': 36.4, 'yours': False, 'topic_id': 169337, 'topic_slug': 'qlora-model-isnt-training', 'display_username': 'Anton Bartash', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 106030, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/qlora-model-isnt-training/169337/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243957, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-22T12:52:50.634Z', 'cooked': '
\n

Nvidia T4

\n
\n

Since T4 doesn’t natively support torch.bfloat16, using torch.float16/ fp16=True instead might resolve the error. No other major issues appear to exist.

', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-22T12:52:50.634Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 8, 'readers_count': 7, 'score': 11.4, 'yours': False, 'topic_id': 169337, 'topic_slug': 'qlora-model-isnt-training', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/qlora-model-isnt-training/169337/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243998, 'name': 'Anton Bartash', 'username': 'antbartash', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/46a35a/{size}.png', 'created_at': '2025-10-23T07:19:01.516Z', 'cooked': '

Thanks for the suggestion
\nIt turned out the issue was environment-related — I was able to get the expected results using the exact same code on Colab. In my local environment, clearing the caches for transformers, torch, etc., and upgrading all the libraries resolved the problem.

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-23T07:19:01.516Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 7, 'readers_count': 6, 'score': 21.2, 'yours': False, 'topic_id': 169337, 'topic_slug': 'qlora-model-isnt-training', 'display_username': 'Anton Bartash', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 106030, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/qlora-model-isnt-training/169337/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 244071, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-24T18:16:57.733Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-10-24T18:16:57.733Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 2, 'readers_count': 1, 'score': 0, 'yours': False, 'topic_id': 169337, 'topic_slug': 'qlora-model-isnt-training', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/qlora-model-isnt-training/169337/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi everyone,
-I’ve been trying to switch from LoRA to QLoRA on an Nvidia T4, but I’m running into an issue where the evaluation loss stays completely flat, while the training loss fluctuates around its initial value.

-

My LoRA setup works fine, but adding bnb_config, model.gradient_checkpointing_enable(), and model = prepare_model_for_kbit_training(model) causes the issue described above.
-

10000003961455×959 167 KB

-

Since the non-quantized version runs without problems, I don’t think the issue is related to the LoRA config, dataset, or formatting functions. The number of trainable parameters is non-zero for both the LoRA and QLoRA setups.

-

Below is the code I’m using for QLoRA. Any help would be appreciated!

-
ds_train_with_assistant_content = ds_train.map(construct_message_with_assistant_content)
-ds_valid_with_assistant_content = ds_valid.map(construct_message_with_assistant_content)
-
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type=""nf4"",
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
-
-checkpoint = ""Qwen/Qwen3-0.6B""
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForCausalLM.from_pretrained(
-    checkpoint,
-    device_map=""auto"",
-    quantization_config=bnb_config
-)
-
-model.config.use_cache = False
-model.gradient_checkpointing_enable()
-model = prepare_model_for_kbit_training(model)
-model.enable_input_require_grads()
-
-
-timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-RUN_NAME = f'qlora-final-model-all-linear-r64-{timestamp}'
-wandb.init(
-    project=os.environ[""WANDB_PROJECT""],
-    name=RUN_NAME,
-    # id=run_id,         # resume previous run if available
-    resume=""allow"",    # allows resuming crashed run
-)
-
-
-RESUME_TRAINING = False
-OUTPUT_DIR = ""./qlora-final_model_all_linear_r64-output""
-PER_DEVICE_BATCH_SIZE = 2  # higher values --> OOM
-
-optimizer = 'paged_adamw_8bit'
-effective_batch_size = 16
-learning_rate = 1e-5
-weight_decay = 0.0
-betas = (0.9, 0.9999)
-warmup_ratio = 0.2
-epochs = 1
-gradient_accumulation_steps = int(effective_batch_size / PER_DEVICE_BATCH_SIZE)
-lora_r = 16*4
-lora_alpha = 64*4
-lora_dropout = 0.01
-
-
-training_args = TrainingArguments(
-    output_dir=OUTPUT_DIR,
-    per_device_train_batch_size=PER_DEVICE_BATCH_SIZE,
-    gradient_accumulation_steps=gradient_accumulation_steps,
-    learning_rate=learning_rate,
-    optim=optimizer, 
-    num_train_epochs=epochs,
-    weight_decay=weight_decay,
-    lr_scheduler_type=""cosine"",
-    warmup_ratio=warmup_ratio,
-    save_strategy=""steps"",
-    save_steps=gradient_accumulation_steps*5,
-    save_total_limit=2,
-    eval_strategy=""steps"",
-    eval_steps=gradient_accumulation_steps*5,
-    logging_strategy=""steps"",
-    logging_steps=gradient_accumulation_steps*5,
-    report_to=['wandb'],
-    run_name=RUN_NAME,
-    bf16=True,
-    # fp16=True,
-    # fp16_full_eval=True,
-    metric_for_best_model=""eval_loss"",
-    greater_is_better=False,
-    max_grad_norm=1,
-    load_best_model_at_end=True,
-    gradient_checkpointing=True,
-    gradient_checkpointing_kwargs={""use_reentrant"": False}
-)
-
-
-peft_config = LoraConfig(
-    r=lora_r,
-    lora_alpha=lora_alpha,
-    lora_dropout=lora_dropout,
-    bias=""none"",
-    task_type=""CAUSAL_LM"",
-    target_modules='all-linear'
-)
-# model.requires_grad_(False)                     # freeze base weights (precautionary)
-model_peft = get_peft_model(model, peft_config) # inject a LoRA adapter
-print_trainable_parameters(model_peft)
-
-trainer = SFTTrainer(
-    model=model_peft,
-    train_dataset=ds_train_with_assistant_content,
-    eval_dataset=ds_valid_with_assistant_content,
-    formatting_func=formatting_func,
-    args=training_args,
-    callbacks=[EarlyStoppingCallback(early_stopping_patience=25)]
-)
-
-
-# Training setup summary
-dataset_size = len(ds_train_with_assistant_content)
-steps_per_epoch = dataset_size // (PER_DEVICE_BATCH_SIZE * gradient_accumulation_steps)
-total_steps = steps_per_epoch * epochs
-warmup_steps = int(total_steps * warmup_ratio)
-
-print(""===== Training Setup Summary ====="")
-print(f""Num epochs:            {epochs}"")
-print(f""Effective batch size:  {effective_batch_size}"")
-print(f""Per-device batch size: {PER_DEVICE_BATCH_SIZE}"")
-print(f""Gradient accumulation: {gradient_accumulation_steps}"")
-print(f""Dataset size:          {dataset_size}"")
-print(f""Steps per epoch:       {steps_per_epoch}"")
-print(f""Total training steps:  {total_steps}"")
-print(f""Warmup steps:          {warmup_steps}"")
-print(f""Logging steps:         {training_args.logging_steps}"")
-print(""==================================="")
-print(f""Start time: {datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"")
-
-
-# Training
-last_checkpoint = None
-if RESUME_TRAINING and os.path.isdir(OUTPUT_DIR):
-    last_checkpoint = get_last_checkpoint(OUTPUT_DIR)
-
-if last_checkpoint is not None:
-    print(f""Resuming training from checkpoint: {last_checkpoint}"")
-    trainer.train(resume_from_checkpoint=last_checkpoint)
-else:
-    print(""Starting fresh training run"")
-    trainer.train()
-
-print(f""End time: {datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"")
-
-
-# WandB logging of eval metrics
-for log in trainer.state.log_history:
-    if 'eval_loss' in log:
-        wandb.log({
-            ""eval_loss"": log['eval_loss'],
-            ""eval_perplexity"": math.exp(log['eval_loss']),
-            ""step"": log['step'],
-            ""learning_rate"": learning_rate,
-            ""weight_decay"": weight_decay,
-            ""betas"": betas,
-            ""warmup_ratio"": warmup_ratio,
-            ""effective_batch_size"": effective_batch_size,
-            ""optimizer"": optimizer
-        })
-
-wandb.finish()  # finish the run
";"

Thanks for the suggestion
-It turned out the issue was environment-related — I was able to get the expected results using the exact same code on Colab. In my local environment, clearing the caches for transformers, torch, etc., and upgrading all the libraries resolved the problem.

";1 -Problem with pyannote.audio==3.1.0;https://discuss.huggingface.co/t/problem-with-pyannote-audio-3-1-0/169326;169326;5;2025-10-21 13:54:38.497000+00:00;"[{'id': 243920, 'name': 'MAJH', 'username': 'aldkela', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/4bbf92/{size}.png', 'created_at': '2025-10-21T13:54:38.567Z', 'cooked': '

Hello, I was trying to use model named pyannote/speaker-diarization-3.1

\n

so I installed some libraries as below

\n
%pip install pyannote.audio==3.1.0\n%pip install numpy==1.26\n
\n

Here is the result and I think I installed this properly…

\n
Collecting pyannote.audio==3.1.0\n  Using cached pyannote.audio-3.1.0-py2.py3-none-any.whl.metadata (7.8 kB)\nRequirement already satisfied: asteroid-filterbanks>=0.4 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (0.4.0)\nRequirement already satisfied: einops>=0.6.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (0.8.1)\nRequirement already satisfied: huggingface-hub>=0.13.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (0.35.3)\nRequirement already satisfied: lightning>=2.0.1 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (2.5.5)\nRequirement already satisfied: omegaconf<3.0,>=2.1 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (2.3.0)\nRequirement already satisfied: pyannote.core>=5.0.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (6.0.1)\nRequirement already satisfied: pyannote.database>=5.0.1 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (6.1.0)\nRequirement already satisfied: pyannote.metrics>=3.2 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (4.0.0)\nRequirement already satisfied: pyannote.pipeline>=3.0.1 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (4.0.0)\nRequirement already satisfied: pytorch-metric-learning>=2.1.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (2.9.0)\nRequirement already satisfied: rich>=12.0.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (14.2.0)\nRequirement already satisfied: semver>=3.0.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (3.0.4)\nRequirement already satisfied: soundfile>=0.12.1 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (0.13.1)\nRequirement already satisfied: speechbrain>=0.5.14 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (1.0.3)\nRequirement already satisfied: tensorboardX>=2.6 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (2.6.4)\nRequirement already satisfied: torch>=2.0.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (2.9.0+cu126)\nRequirement already satisfied: torch-audiomentations>=0.11.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (0.12.0)\nRequirement already satisfied: torchaudio>=2.0.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (2.9.0)\nRequirement already satisfied: torchmetrics>=0.11.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from pyannote.audio==3.1.0) (1.8.2)\nRequirement already satisfied: antlr4-python3-runtime==4.9.* in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from omegaconf<3.0,>=2.1->pyannote.audio==3.1.0) (4.9.3)\nRequirement already satisfied: PyYAML>=5.1.0 in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from omegaconf<3.0,>=2.1->pyannote.audio==3.1.0) (6.0.3)\nRequirement already satisfied: numpy in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from asteroid-filterbanks>=0.4->pyannote.audio==3.1.0) (1.26.0)\nRequirement already satisfied: typing-extensions in c:\\gpt_agent_2025_book\\venv\\lib\\site-packages (from asteroid-filterbanks>=0.4->pyannote.audio==3.1.0) (4.15.0)\n...\n    Uninstalling numpy-2.3.4:\n      Successfully uninstalled numpy-2.3.4\nSuccessfully installed numpy-1.26.0\nNote: you may need to restart the kernel to use updated packages.\nOutput is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...\nERROR: pip\'s dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\npyannote-core 6.0.1 requires numpy>=2.0, but you have numpy 1.26.0 which is incompatible.\npyannote-metrics 4.0.0 requires numpy>=2.2.2, but you have numpy 1.26.0 which is incompatible.\n
\n

I ran this code to load the ffmpeg

\n
from pathlib import Path\nimport os, sys\n\nffmpeg_dll_dir = Path(r""C:\\Users\\majh0\\miniconda3\\Library\\bin"")  \nassert ffmpeg_dll_dir.exists(), ffmpeg_dll_dir\nos.add_dll_directory(str(ffmpeg_dll_dir))  \n\nimport torch, torchcodec, platform, subprocess\nprint(""exe:"", sys.executable)\nprint(""torch"", torch.__version__, ""torchcodec"", torchcodec.__version__, ""py"", platform.python_version())\nsubprocess.run([""ffmpeg"", ""-version""], check=True)\nprint(""cuda torch?"",torch.cuda.is_available())\n
\n

and the result looks fine to me..

\n
exe: c:\\GPT_AGENT_2025_BOOK\\venv\\Scripts\\python.exe\ntorch 2.9.0+cu126 torchcodec 0.8.0 py 3.12.9\ncuda torch? True\n
\n

I ran this code and it gave me an error as below…

\n
# instantiate the pipeline\nimport torch\nfrom pyannote.audio import Pipeline\npipeline = Pipeline.from_pretrained(\n  ""pyannote/speaker-diarization-3.1"",\n  token=""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"")\n\n\nif torch.cuda.is_available():\n    pipeline.to(torch.device(""cuda""))\n    print(""Using CUDA"")\nelse:\n    print(""Using CPU"")\n
\n
---------------------------------------------------------------------------\nAttributeError                            Traceback (most recent call last)\nCell In[3], line 3\n      1 # instantiate the pipeline\n      2 import torch\n----> 3 from pyannote.audio import Pipeline\n      4 pipeline = Pipeline.from_pretrained(\n      5   ""pyannote/speaker-diarization-3.1"",\n      6   token=""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"")\n      9 if torch.cuda.is_available():\n\nFile c:\\GPT_AGENT_2025_BOOK\\venv\\Lib\\site-packages\\pyannote\\audio\\__init__.py:29\n     25 except ImportError:\n     26     pass\n---> 29 from .core.inference import Inference\n     30 from .core.io import Audio\n     31 from .core.model import Model\n\nFile c:\\GPT_AGENT_2025_BOOK\\venv\\Lib\\site-packages\\pyannote\\audio\\core\\inference.py:36\n     33 from pyannote.core import Segment, SlidingWindow, SlidingWindowFeature\n     34 from pytorch_lightning.utilities.memory import is_oom_error\n---> 36 from pyannote.audio.core.io import AudioFile\n     37 from pyannote.audio.core.model import Model, Specifications\n     38 from pyannote.audio.core.task import Resolution\n...\n     49     - a ""str"" or ""Path"" instance: ""audio.wav"" or Path(""audio.wav"")\n   (...)     56 integer to load a specific channel: {""audio"": ""stereo.wav"", ""channel"": 0}\n     57 """"""\n\nAttributeError: module \'torchaudio\' has no attribute \'set_audio_backend\'\n
\n

I have checked the document and it says I need to install pyannote.audio 3.1

\n

I don’t know why this thing doesn’t work…. I tried to solve this problem for 3hrs changing version of pyannote.audio but this thing didn’t give me solution..

\n

Do I need to delete venv and reinstall it clearly..?

\n

Thank you so much for the help in advance..

', 'post_number': 1, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-10-21T14:42:42.475Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 84, 'reads': 5, 'readers_count': 4, 'score': 221.0, 'yours': False, 'topic_id': 169326, 'topic_slug': 'problem-with-pyannote-audio-3-1-0', 'display_username': 'MAJH', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/pyannote/pyannote-audio', 'internal': False, 'reflection': False, 'title': 'GitHub - pyannote/pyannote-audio: Neural building blocks for speaker diarization: speech activity detection, speaker change detection, overlapped speech detection, speaker embedding', 'clicks': 0}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105819, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/problem-with-pyannote-audio-3-1-0/169326/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243939, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-22T02:49:32.789Z', 'cooked': '

Seems library version incompatibility…

\n
\n

Your import error comes from an API removal in torchaudio and an incompatible NumPy pin. Fix by upgrading pyannote.audio and undoing the NumPy downgrade. Keep your Torch 2.9 stack.

\n

TL;DR fix

\n
# clean conflicting pins\npip uninstall -y pyannote.audio pyannote.core pyannote.metrics pyannote.pipeline pyannote.database numpy\n\n# install a compatible, modern set\npip install --upgrade ""numpy>=2.3"" ""pyannote.audio>=4.0.1"" --prefer-binary\n# keep your existing torch==2.9.*, torchaudio==2.9.* and torchcodec\n
\n

pyannote.audio>=4 removed the old torchaudio backend call and uses FFmpeg via torchcodec, so the import works on torchaudio≥2.2. NumPy≥2.x satisfies pyannote-core and pyannote-metrics. (GitHub)

\n

Then restart the kernel once. Verify:

\n
# refs:\n# - torchaudio dispatcher notes: https://docs.pytorch.org/audio/main/torchaudio.html\n# - pyannote model card: https://huggingface.co/pyannote/speaker-diarization-3.1\nimport torchaudio, torchcodec\nprint(""backends:"", torchaudio.list_audio_backends())  # should show \'ffmpeg\' and/or \'soundfile\'\nfrom pyannote.audio import Pipeline\npipe = Pipeline.from_pretrained(""pyannote/speaker-diarization-3.1"", token=""hf_xxx"")  # do not hardcode secrets\n
\n

set_audio_backend was deprecated, then removed in torchaudio 2.2+, which is why pyannote.audio==3.1.0 fails to import on your current torchaudio. (PyTorch Docs)

\n

Why your install failed

\n\n

If you must stay on pyannote.audio==3.1.0 (not recommended)

\n

Pick one, not both:

\n
# Legacy stack that still has set_audio_backend\npip install ""torch<=2.1.2"" ""torchaudio<=2.1.2"" ""numpy>=2.0,<3"" ""pyannote.audio==3.1.0""\n
\n

or a temporary shim:

\n
# WARNING: local hack to import 3.1.0 with new torchaudio\nimport torchaudio\nif not hasattr(torchaudio, ""set_audio_backend""):\n    torchaudio.set_audio_backend = lambda *a, **k: None\n    torchaudio.get_audio_backend = lambda: ""soundfile""\nfrom pyannote.audio import Pipeline\n
\n

The first aligns versions to when the API existed. The second bypasses the call so you can upgrade later. (PyTorch Docs)

\n

Gating and FFmpeg checks

\n\n

Sanity test end-to-end

\n
# refs in comments:\n# https://huggingface.co/pyannote/speaker-diarization-3.1\n# https://docs.pytorch.org/audio/main/torchaudio.html\nimport torch\nfrom pyannote.audio import Pipeline\npipe = Pipeline.from_pretrained(""pyannote/speaker-diarization-3.1"", token=""hf_xxx"")\nif torch.cuda.is_available():\n    pipe.to(""cuda"")\nresult = pipe(""sample.wav"")  # 16 kHz mono recommended\nprint(result)\n
\n

The model card confirms “pyannote.audio version 3.1 or higher,” so using 4.x is valid and simpler on modern Torch. (Hugging Face)

\n

Extra context and references

\n\n

Deleting the venv is optional. Uninstall→reinstall with the versions above and one kernel restart is sufficient.

', 'post_number': 2, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-10-22T02:50:15.452Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 4, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 169326, 'topic_slug': 'problem-with-pyannote-audio-3-1-0', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/pyannote/pyannote-audio/releases', 'internal': False, 'reflection': False, 'title': 'Releases · pyannote/pyannote-audio · GitHub', 'clicks': 1}, {'url': 'https://github.com/pyannote/pyannote-audio/issues/1576', 'internal': False, 'reflection': False, 'title': 'Removing torchaudio.set_audio_backend(""soundfile"") · Issue #1576 · pyannote/pyannote-audio · GitHub', 'clicks': 1}, {'url': 'https://github.com/huggingface/transformers/issues/41230', 'internal': False, 'reflection': False, 'title': 'Consider forking and maintaining pyctcdecode or switch to torchaudio.models.decoder · Issue #41230 · huggingface/transformers · GitHub', 'clicks': 0}, {'url': 'https://huggingface.co/pyannote/speaker-diarization-3.1', 'internal': False, 'reflection': False, 'title': 'pyannote/speaker-diarization-3.1 · Hugging Face', 'clicks': 0}, {'url': 'https://docs.pytorch.org/audio/main/torchaudio.html', 'internal': False, 'reflection': False, 'title': 'torchaudio — Torchaudio 2.8.0 documentation', 'clicks': 0}, {'url': 'https://huggingface.co/collinbarnwell/pyannote-speaker-diarization-31', 'internal': False, 'reflection': False, 'title': 'collinbarnwell/pyannote-speaker-diarization-31 · Hugging Face', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/problem-with-pyannote-audio-3-1-0/169326/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243955, 'name': 'MAJH', 'username': 'aldkela', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/4bbf92/{size}.png', 'created_at': '2025-10-22T12:34:52.198Z', 'cooked': '

Hello! Thank you so much!! I realized.. I should read the error msg properly to solve the problem!!! xD

\n

I have one more problem….

\n

I made a code as below..

\n
from pathlib import Path\nimport os, sys\n\nffmpeg_dll_dir = Path(r""C:\\Users\\majh0\\miniconda3\\Library\\bin"")  \nassert ffmpeg_dll_dir.exists(), ffmpeg_dll_dir\nos.add_dll_directory(str(ffmpeg_dll_dir))  \n\nimport torch, torchcodec, platform, subprocess\nprint(""exe:"", sys.executable)\nprint(""torch"", torch.__version__, ""torchcodec"", torchcodec.__version__, ""py"", platform.python_version())\nsubprocess.run([""ffmpeg"", ""-version""], check=True)\nprint(""cuda torch?"",torch.cuda.is_available())\n\n# instantiate the pipeline\nimport torch\nfrom pyannote.audio import Pipeline\n\npipeline = Pipeline.from_pretrained(\n  ""pyannote/speaker-diarization-3.1"",\n  token=""my token"")\n\n\nif torch.cuda.is_available():\n    pipeline.to(torch.device(""cuda""))\n    print(""Using CUDA"")\nelse:\n    print(""Using CPU"")\n\naudio_file =""./guitar.wav""\ndiarization = pipeline(audio_file)\n\n# dump the diarization output to disk using RTTM format\nwith open(""./guitar.rttm"", ""w"", encoding=""utf-8"") as rttm:\n    diarization.write_rttm(rttm)\n
\n

this thing gave me error as below…

\n
---------------------------------------------------------------------------\nAttributeError                            Traceback (most recent call last)\nCell In[15], line 6\n      4 # dump the diarization output to disk using RTTM format\n      5 with open(""./guitar.rttm"", ""w"", encoding=""utf-8"") as rttm:\n----> 6     diarization.write_rttm(rttm)\n\nAttributeError: \'DiarizeOutput\' object has no attribute \'write_rttm\'\n
\n

This thing is hard to understand for me… because I literally typed “diarization.write_rttm(rttm)” same with the example of this document like picture below https://huggingface.co/pyannote/speaker-diarization-3.1

\n

image768×361 15.6 KB

\n

the name of the function “write_rttm” has changed? then is there any way to check the new name of it..?

\n

or did I make another mistake again..?

\n

I think I am bothering you too much.. but thank you so much for your help..

', 'post_number': 3, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-10-22T12:34:52.198Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 169326, 'topic_slug': 'problem-with-pyannote-audio-3-1-0', 'display_username': 'MAJH', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/pyannote/speaker-diarization-3.1', 'internal': False, 'reflection': False, 'title': 'pyannote/speaker-diarization-3.1 · Hugging Face', 'clicks': 0}], 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105819, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/problem-with-pyannote-audio-3-1-0/169326/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243956, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-22T12:48:54.185Z', 'cooked': '

It seems like a partial hit. The cause is a specification change due to a library version upgrade, but it appears to be because the returned object changed, not because the function itself changed.

\n
\n

You’re on pyannote.audio 4.x. In 4.x the pipeline returns a DiarizeOutput object, not an Annotation. The Annotation lives at output.speaker_diarization. write_rttm is a method of Annotation, so call it there.

\n
from pyannote.audio import Pipeline\nimport torch\n\npipeline = Pipeline.from_pretrained(\n    ""pyannote/speaker-diarization-3.1"",\n    token=""YOUR_HF_TOKEN""\n)\nif torch.cuda.is_available():\n    pipeline.to(""cuda"")\n\nout = pipeline(""./guitar.wav"")                   # out is DiarizeOutput\nann = out.speaker_diarization                    # this is an Annotation\n\nwith open(""./guitar.rttm"", ""w"", encoding=""utf-8"") as f:\n    ann.write_rttm(f)\n
\n

Evidence

\n\n

Option if you want the old behavior: pin to the legacy stack (pyannote.audio==3.1.x) where pipeline(...) returns an Annotation, and the snippet diarization.write_rttm(...) works as-is. Note 4.x introduced several breaking changes, including API renames. (GitHub)

', 'post_number': 4, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-10-22T12:48:54.185Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 2, 'readers_count': 1, 'score': 25.4, 'yours': False, 'topic_id': 169326, 'topic_slug': 'problem-with-pyannote-audio-3-1-0', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/pyannote/speaker-diarization-3.1', 'internal': False, 'reflection': False, 'title': 'pyannote/speaker-diarization-3.1 · Hugging Face', 'clicks': 1}, {'url': 'https://github.com/pyannote/pyannote-audio', 'internal': False, 'reflection': False, 'title': 'GitHub - pyannote/pyannote-audio: Neural building blocks for speaker diarization: speech activity detection, speaker change detection, overlapped speech detection, speaker embedding', 'clicks': 1}, {'url': 'https://pyannote.github.io/pyannote-core/_modules/pyannote/core/annotation.html', 'internal': False, 'reflection': False, 'title': 'pyannote.core.annotation — pyannote.core 6.0.2.dev0+gb83999a4e.d20250916 documentation', 'clicks': 1}, {'url': 'https://github.com/pyannote/pyannote-audio/releases', 'internal': False, 'reflection': False, 'title': 'Releases · pyannote/pyannote-audio · GitHub', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/problem-with-pyannote-audio-3-1-0/169326/4', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 244024, 'name': 'MAJH', 'username': 'aldkela', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/4bbf92/{size}.png', 'created_at': '2025-10-23T18:31:44.078Z', 'cooked': '

Hello, finally it works!!!

\n

I thought I made mistake again.. I didn’t even think there was a change due to a library version upgrade..

\n

Thank you so much now I can use this model without any problem!!!

', 'post_number': 5, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-10-23T18:31:44.078Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 2, 'readers_count': 1, 'score': 20.4, 'yours': False, 'topic_id': 169326, 'topic_slug': 'problem-with-pyannote-audio-3-1-0', 'display_username': 'MAJH', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105819, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/problem-with-pyannote-audio-3-1-0/169326/5', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 244046, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-24T06:32:17.200Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 6, 'post_type': 3, 'posts_count': 6, 'updated_at': '2025-10-24T06:32:17.200Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 169326, 'topic_slug': 'problem-with-pyannote-audio-3-1-0', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/problem-with-pyannote-audio-3-1-0/169326/6', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hello, I was trying to use model named pyannote/speaker-diarization-3.1

-

so I installed some libraries as below

-
%pip install pyannote.audio==3.1.0
-%pip install numpy==1.26
-
-

Here is the result and I think I installed this properly…

-
Collecting pyannote.audio==3.1.0
-  Using cached pyannote.audio-3.1.0-py2.py3-none-any.whl.metadata (7.8 kB)
-Requirement already satisfied: asteroid-filterbanks>=0.4 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (0.4.0)
-Requirement already satisfied: einops>=0.6.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (0.8.1)
-Requirement already satisfied: huggingface-hub>=0.13.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (0.35.3)
-Requirement already satisfied: lightning>=2.0.1 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (2.5.5)
-Requirement already satisfied: omegaconf<3.0,>=2.1 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (2.3.0)
-Requirement already satisfied: pyannote.core>=5.0.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (6.0.1)
-Requirement already satisfied: pyannote.database>=5.0.1 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (6.1.0)
-Requirement already satisfied: pyannote.metrics>=3.2 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (4.0.0)
-Requirement already satisfied: pyannote.pipeline>=3.0.1 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (4.0.0)
-Requirement already satisfied: pytorch-metric-learning>=2.1.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (2.9.0)
-Requirement already satisfied: rich>=12.0.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (14.2.0)
-Requirement already satisfied: semver>=3.0.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (3.0.4)
-Requirement already satisfied: soundfile>=0.12.1 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (0.13.1)
-Requirement already satisfied: speechbrain>=0.5.14 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (1.0.3)
-Requirement already satisfied: tensorboardX>=2.6 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (2.6.4)
-Requirement already satisfied: torch>=2.0.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (2.9.0+cu126)
-Requirement already satisfied: torch-audiomentations>=0.11.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (0.12.0)
-Requirement already satisfied: torchaudio>=2.0.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (2.9.0)
-Requirement already satisfied: torchmetrics>=0.11.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from pyannote.audio==3.1.0) (1.8.2)
-Requirement already satisfied: antlr4-python3-runtime==4.9.* in c:\gpt_agent_2025_book\venv\lib\site-packages (from omegaconf<3.0,>=2.1->pyannote.audio==3.1.0) (4.9.3)
-Requirement already satisfied: PyYAML>=5.1.0 in c:\gpt_agent_2025_book\venv\lib\site-packages (from omegaconf<3.0,>=2.1->pyannote.audio==3.1.0) (6.0.3)
-Requirement already satisfied: numpy in c:\gpt_agent_2025_book\venv\lib\site-packages (from asteroid-filterbanks>=0.4->pyannote.audio==3.1.0) (1.26.0)
-Requirement already satisfied: typing-extensions in c:\gpt_agent_2025_book\venv\lib\site-packages (from asteroid-filterbanks>=0.4->pyannote.audio==3.1.0) (4.15.0)
-...
-    Uninstalling numpy-2.3.4:
-      Successfully uninstalled numpy-2.3.4
-Successfully installed numpy-1.26.0
-Note: you may need to restart the kernel to use updated packages.
-Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
-ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
-pyannote-core 6.0.1 requires numpy>=2.0, but you have numpy 1.26.0 which is incompatible.
-pyannote-metrics 4.0.0 requires numpy>=2.2.2, but you have numpy 1.26.0 which is incompatible.
-
-

I ran this code to load the ffmpeg

-
from pathlib import Path
-import os, sys
-
-ffmpeg_dll_dir = Path(r""C:\Users\majh0\miniconda3\Library\bin"")  
-assert ffmpeg_dll_dir.exists(), ffmpeg_dll_dir
-os.add_dll_directory(str(ffmpeg_dll_dir))  
-
-import torch, torchcodec, platform, subprocess
-print(""exe:"", sys.executable)
-print(""torch"", torch.__version__, ""torchcodec"", torchcodec.__version__, ""py"", platform.python_version())
-subprocess.run([""ffmpeg"", ""-version""], check=True)
-print(""cuda torch?"",torch.cuda.is_available())
-
-

and the result looks fine to me..

-
exe: c:\GPT_AGENT_2025_BOOK\venv\Scripts\python.exe
-torch 2.9.0+cu126 torchcodec 0.8.0 py 3.12.9
-cuda torch? True
-
-

I ran this code and it gave me an error as below…

-
# instantiate the pipeline
-import torch
-from pyannote.audio import Pipeline
-pipeline = Pipeline.from_pretrained(
-  ""pyannote/speaker-diarization-3.1"",
-  token=""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"")
-
-
-if torch.cuda.is_available():
-    pipeline.to(torch.device(""cuda""))
-    print(""Using CUDA"")
-else:
-    print(""Using CPU"")
-
-
---------------------------------------------------------------------------
-AttributeError                            Traceback (most recent call last)
-Cell In[3], line 3
-      1 # instantiate the pipeline
-      2 import torch
-----> 3 from pyannote.audio import Pipeline
-      4 pipeline = Pipeline.from_pretrained(
-      5   ""pyannote/speaker-diarization-3.1"",
-      6   token=""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"")
-      9 if torch.cuda.is_available():
-
-File c:\GPT_AGENT_2025_BOOK\venv\Lib\site-packages\pyannote\audio\__init__.py:29
-     25 except ImportError:
-     26     pass
----> 29 from .core.inference import Inference
-     30 from .core.io import Audio
-     31 from .core.model import Model
-
-File c:\GPT_AGENT_2025_BOOK\venv\Lib\site-packages\pyannote\audio\core\inference.py:36
-     33 from pyannote.core import Segment, SlidingWindow, SlidingWindowFeature
-     34 from pytorch_lightning.utilities.memory import is_oom_error
----> 36 from pyannote.audio.core.io import AudioFile
-     37 from pyannote.audio.core.model import Model, Specifications
-     38 from pyannote.audio.core.task import Resolution
-...
-     49     - a ""str"" or ""Path"" instance: ""audio.wav"" or Path(""audio.wav"")
-   (...)     56 integer to load a specific channel: {""audio"": ""stereo.wav"", ""channel"": 0}
-     57 """"""
-
-AttributeError: module 'torchaudio' has no attribute 'set_audio_backend'
-
-

I have checked the document and it says I need to install pyannote.audio 3.1

-

I don’t know why this thing doesn’t work…. I tried to solve this problem for 3hrs changing version of pyannote.audio but this thing didn’t give me solution..

-

Do I need to delete venv and reinstall it clearly..?

-

Thank you so much for the help in advance..

";"

It seems like a partial hit. The cause is a specification change due to a library version upgrade, but it appears to be because the returned object changed, not because the function itself changed.

-
-

You’re on pyannote.audio 4.x. In 4.x the pipeline returns a DiarizeOutput object, not an Annotation. The Annotation lives at output.speaker_diarization. write_rttm is a method of Annotation, so call it there.

-
from pyannote.audio import Pipeline
-import torch
-
-pipeline = Pipeline.from_pretrained(
-    ""pyannote/speaker-diarization-3.1"",
-    token=""YOUR_HF_TOKEN""
-)
-if torch.cuda.is_available():
-    pipeline.to(""cuda"")
-
-out = pipeline(""./guitar.wav"")                   # out is DiarizeOutput
-ann = out.speaker_diarization                    # this is an Annotation
-
-with open(""./guitar.rttm"", ""w"", encoding=""utf-8"") as f:
-    ann.write_rttm(f)
-
-

Evidence

- -

Option if you want the old behavior: pin to the legacy stack (pyannote.audio==3.1.x) where pipeline(...) returns an Annotation, and the snippet diarization.write_rttm(...) works as-is. Note 4.x introduced several breaking changes, including API renames. (GitHub)

";1 -How to make my customized pipeline consumable for Transformers.js;https://discuss.huggingface.co/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036;169036;5;2025-10-08 15:06:33.223000+00:00;"[{'id': 243309, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-08T15:06:33.311Z', 'cooked': '

Hi community,

\n

Here is my image-to-text pipeline:

\n

(customized means not a registered one in official Transformers)

\n

A customized Image processor,

\n

A VisionEncoderDecoder, with a customized vision encoder that inherits the PretrainedModel and a MBartDecoder,

\n

A WordLevel tokenizer (yes I haven’t used a MBartTokenizer and I have distilled my own one for specific corpus).

\n

I want to consume this pipeline in Transformers.js, however I notice that all examples given in Transformers.js documentation seem like pulling from a ready made Transformers pipeline with official components and configurations, I just wonder is it possible to turn my customized pipeline consumable for Transformers.js, or to what extent my pipeline could be partially turned to?

\n

My guess is that the I should make my own image preprocessing step and send the image input tensor to the model, in that way, which kind of js libraries you recommend to use? (It won’t be very intensive, just simply resize and normalize things plus a crop-white-margin function which doesn’t exist in Transformers’ image processors).

\n

Also just to be sure, is my VisionEncoderDecoder possible to export to an onnx format to be consumable for Transformers.js?

\n

Of course my model should be possible to run in browser (and that’s the whole point for me to do this), as it has only 20M parameters (way less than the showcase in Transformers.js)

\n

Thanks for your help in advance!

', 'post_number': 1, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-08T15:19:25.343Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 26, 'reads': 9, 'readers_count': 8, 'score': 21.6, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/load-model-from-platform-other-than-hf-hub-and-display-a-progress-bar-by-from-pretrained-in-transformers-js/169364', 'internal': True, 'reflection': True, 'title': 'Load model from platform other than HF Hub and display a progress bar by `from_pretrained()` in Transformers.js', 'clicks': 0}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243331, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-08T23:15:26.000Z', 'cooked': '

It seems possible. For Transoformers.js, there’s a dedicated channel on the HF Discord, so asking there would be the most reliable option.

', 'post_number': 2, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-08T23:15:26.000Z', 'reply_count': 2, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 8, 'readers_count': 7, 'score': 26.4, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/datasets/John6666/forum1/blob/main/transformer_js_custom_pipeline_1.md', 'internal': False, 'reflection': False, 'title': 'transformer_js_custom_pipeline_1.md · John6666/forum1 at main', 'clicks': 2}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 243351, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-09T05:47:31.103Z', 'cooked': '

Thanks let me check!

', 'post_number': 3, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-09T05:47:31.103Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 8, 'readers_count': 7, 'score': 16.4, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243504, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-13T17:27:00.991Z', 'cooked': '

Hi John,
\nI try to follow your export script and I made to export 1 onnx file with the following:

\n
register_tasks_manager_onnx = TasksManager.create_register(""onnx"")\n@register_tasks_manager_onnx(""my_hgnetv2"", *[""feature-extraction""])\nclass HGNetv2OnnxConfig(ViTOnnxConfig):\n    @property\n    def inputs(self):\n        return {""pixel_values"": {0: ""batch""}} # only dynamical axis is needed to list here\n    @property\n    def outputs(self):\n        return {""last_hidden_state"": {0: ""batch""}}\n\ndef export_onnx():\n    path=\'./model\'\n    model = VisionEncoderDecoderModel.from_pretrained(path)\n    onnx_config_constructor = TasksManager.get_exporter_config_constructor(\n        exporter=""onnx"",\n        model=model,\n        task=""image-to-text"",\n        library_name=""transformers"",\n        exporter_config_kwargs={""use_past"": True},\n    )\n    onnx_config = onnx_config_constructor(model.config)\n    out = Path(""./model/onnx"")\n    out.mkdir(exist_ok=True)\n\n    inputs, outputs = export(model, \n                             onnx_config, \n                             out/""model.onnx"", \n                             onnx_config.DEFAULT_ONNX_OPSET,\n                             input_shapes={""pixel_values"": [1, 3, 384, 384]},\n                             )\n    print(inputs)\n    print(outputs)\n
\n

However, I don’t know how to export to trio .onnx file with the cli, since within the python script, I can register the customized config, but I don’t know how to register it with cli…

', 'post_number': 4, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-13T17:27:47.078Z', 'reply_count': 1, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 7, 'readers_count': 6, 'score': 21.2, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/4', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243505, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-13T17:54:45.869Z', 'cooked': '

Oh I see, it’s here Export a model to ONNX with optimum.exporters.onnx and we need to use main_export instead of export

', 'post_number': 5, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-13T17:54:45.869Z', 'reply_count': 1, 'reply_to_post_number': 4, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 21.0, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/optimum-onnx/onnx/usage_guides/export_a_model#customize-the-export-of-official-transformers-models', 'internal': False, 'reflection': False, 'title': 'Export a model to ONNX with optimum.exporters.onnx', 'clicks': 0}], 'read': True, 'user_title': None, 'reply_to_user': {'id': 104516, 'username': 'alephpi', 'name': 'Sicheng Mao', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/5', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243509, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-13T20:49:24.000Z', 'cooked': '

Finally I use the following:

\n
def export_onnx():\n    path=\'./model\'\n    out = Path(""./model/trio_onnx"")\n    out.mkdir(exist_ok=True)\n\n    main_export(\n        path,\n        task=""image-to-text"",\n        output=out,\n    )\n
\n

However, this can only export to encoder_model.onnx and decoder_model.onnx, since I have no idea how the use_past=True can be injected with main_export’s argument(The example in the above link doesn’t work out), I monkey-patched the source code to make it export to trio onnx.

', 'post_number': 6, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-13T20:49:24.000Z', 'reply_count': 0, 'reply_to_post_number': 5, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 16.0, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 104516, 'username': 'alephpi', 'name': 'Sicheng Mao', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/6', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243513, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-13T23:14:53.440Z', 'cooked': '

For Transformer.js:

\n
\n

Use main_export() with custom_onnx_configs and with_behavior(..., use_past=True) to get the trio. Do not monkey-patch.

\n

Background and context

\n\n

Minimal, correct export (no patches)

\n
# refs:\n# - Export guide (custom_onnx_configs + with_behavior + no_post_process):\n#   https://huggingface.co/docs/optimum-onnx/onnx/usage_guides/export_a_model\n# - main_export reference:\n#   https://huggingface.co/docs/optimum-onnx/en/onnx/package_reference/export\n\nfrom pathlib import Path\nfrom transformers import AutoConfig\nfrom optimum.exporters.onnx import main_export\nfrom optimum.exporters.tasks import TasksManager\n\nmodel_dir = ""./model""                       # your VisionEncoderDecoder checkpoint\nout = Path(""./model/trio_onnx""); out.mkdir(parents=True, exist_ok=True)\n\n# Build an ONNX config for your model+task\ncfg = AutoConfig.from_pretrained(model_dir)\nctor = TasksManager.get_exporter_config_constructor(\n    model_type=cfg.model_type, backend=""onnx"", task=""image-to-text""  # vision→text task\n)\nonnx_cfg = ctor(config=cfg, task=""image-to-text"")\n\n# Ask explicitly for the three subgraphs\ncustom_onnx_configs = {\n    ""encoder_model"": onnx_cfg.with_behavior(""encoder""),\n    ""decoder_model"": onnx_cfg.with_behavior(""decoder"", use_past=False),\n    ""decoder_with_past_model"": onnx_cfg.with_behavior(""decoder"", use_past=True),\n}\n\n# Export. Keep trio separate (avoid automatic merge).\nmain_export(\n    model=model_dir,\n    task=""image-to-text"",\n    output=str(out),\n    custom_onnx_configs=custom_onnx_configs,\n    no_post_process=True,\n)\n
\n

Why this works: Optimum documents custom_onnx_configs and with_behavior(""decoder"", use_past=True) to emit decoder_with_past_model.onnx; no_post_process=True prevents the exporter from merging decoders. (Hugging Face)

\n

Verify and align with Transformers.js

\n\n

Common failure modes and fixes

\n\n

Optional: merged decoder

\n

Some exporters can produce a single decoder_model_merged.onnx that handles both first and subsequent tokens. If you prefer that, omit no_post_process=True. The public ViT-GPT2 repo shows merged and split variants side by side. (Hugging Face)

', 'post_number': 7, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-13T23:14:53.440Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 6, 'readers_count': 5, 'score': 6.0, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/optimum-onnx/onnx/usage_guides/export_a_model', 'internal': False, 'reflection': False, 'title': 'Export a model to ONNX with optimum.exporters.onnx', 'clicks': 1}, {'url': 'https://huggingface.co/Xenova/vit-gpt2-image-captioning/tree/main/onnx', 'internal': False, 'reflection': False, 'title': 'Xenova/vit-gpt2-image-captioning at main', 'clicks': 0}, {'url': 'https://huggingface.co/Xenova/vit-gpt2-image-captioning', 'internal': False, 'reflection': False, 'title': 'Xenova/vit-gpt2-image-captioning · Hugging Face', 'clicks': 0}, {'url': 'https://discuss.huggingface.co/t/when-exporting-seq2seq-models-with-onnx-why-do-we-need-both-decoder-with-past-model-onnx-and-decoder-model-onnx/33354', 'internal': True, 'reflection': False, 'title': 'When exporting seq2seq models with ONNX, why do we need both decoder_with_past_model.onnx and decoder_model.onnx?', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/7', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243560, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-14T08:55:40.490Z', 'cooked': '

Well, I still cannot make this work, by debugging, I find that the main_export() will take me to optimum.exporters.utils._get_submodels_and_export_configs(), and an error raises here

\n
        # When specifying custom export configs for supported transformers architectures, we do\n        # not force to specify a custom export config for each submodel.\n        for key, custom_export_config in custom_export_configs.items():\n            models_and_export_configs[key] = (models_and_export_configs[key][0], custom_export_config)\n
\n

where the custom_export_configs is the one we passed in with use_past injected, while the models_and_export_configs, generated here

\n
            # TODO: this succession of if/else strongly suggests a refactor is needed.\n            if (\n                task.startswith(TasksManager._ENCODER_DECODER_TASKS)\n                and model.config.is_encoder_decoder\n                and not monolith\n            ):\n                models_and_export_configs = get_encoder_decoder_models_for_export(model, export_config)\n
\n

doesn’t contain the key “decoder_with_past”, where the default export_config generated here

\n
           export_config_constructor = TasksManager.get_exporter_config_constructor(\n                model=model, exporter=exporter, task=task, library_name=library_name\n            )\n           export_config = export_config_constructor(\n                model.config,\n                int_dtype=int_dtype,\n                float_dtype=float_dtype,\n                preprocessors=preprocessors,\n            )\n
\n

with a default use_past=False, therefore would not generate a config for “decoder_with_past”.
\nAnd actually here is what I monkey_patched during the debugging.

\n

I think there is a high dependency between the export config and model config in optimum library, where I although use a customized encoder but still the VisionEncoderDecoder Config as the outermost config, which leads me to the not custom_architecture config processing logic here, which leads to the above error, which may not considered as a normal scenario in design.

\n
    if not custom_architecture:\n        if library_name == ""diffusers"":\n            export_config = None\n            models_and_export_configs = get_diffusion_models_for_export(\n                model, int_dtype=int_dtype, float_dtype=float_dtype, exporter=exporter\n            )\n        else:\n            export_config_constructor = TasksManager.get_exporter_config_constructor(\n                model=model, exporter=exporter, task=task, library_name=library_name\n            )\n            export_config = export_config_constructor(\n                model.config,\n                int_dtype=int_dtype,\n                float_dtype=float_dtype,\n                preprocessors=preprocessors,\n            )\n\n            export_config.variant = _variant\n            all_variants = ""\\n"".join(\n                [f""    - {name}: {description}"" for name, description in export_config.VARIANTS.items()]\n            )\n            logger.info(f""Using the export variant {export_config.variant}. Available variants are:\\n{all_variants}"")\n\n            # TODO: this succession of if/else strongly suggests a refactor is needed.\n            if (\n                task.startswith(TasksManager._ENCODER_DECODER_TASKS)\n                and model.config.is_encoder_decoder\n                and not monolith\n            ):\n                models_and_export_configs = get_encoder_decoder_models_for_export(model, export_config)\n            elif task.startswith(""text-generation"") and not monolith:\n                models_and_export_configs = get_decoder_models_for_export(model, export_config)\n            elif model.config.model_type == ""sam"":\n                models_and_export_configs = get_sam_models_for_export(model, export_config)\n            elif model.config.model_type == ""speecht5"":\n                models_and_export_configs = get_speecht5_models_for_export(model, export_config, model_kwargs)\n            elif model.config.model_type == ""musicgen"":\n                models_and_export_configs = get_musicgen_models_for_export(model, export_config)\n            else:\n                models_and_export_configs = {""model"": (model, export_config)}\n\n        # When specifying custom export configs for supported transformers architectures, we do\n        # not force to specify a custom export config for each submodel.\n        for key, custom_export_config in custom_export_configs.items():\n            models_and_export_configs[key] = (models_and_export_configs[key][0], custom_export_config)\n
', 'post_number': 8, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-14T09:00:23.165Z', 'reply_count': 1, 'reply_to_post_number': 7, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 20.8, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/8', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243569, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-14T09:27:23.844Z', 'cooked': '

Alright, actually we don’t need those verbose configs, just change the task from “image-to-text” to “image-to-text-with-past” will solve the issue (no monkey-patch)

\n
def export_onnx():\n    path=\'./model\'\n    out = Path(""./model/trio_onnx"")\n    out.mkdir(exist_ok=True)\n    main_export(\n        path,\n        task=""image-to-text-with-past"", # to get trio onnx model, use ""-with-past"", otherwise use ""image-to-text""\n        output=out,\n    )\n
', 'post_number': 9, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-14T09:27:35.932Z', 'reply_count': 0, 'reply_to_post_number': 8, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 15.6, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 104516, 'username': 'alephpi', 'name': 'Sicheng Mao', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/9', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243573, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-14T11:37:36.605Z', 'cooked': '

Great. About _with_past

', 'post_number': 10, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-14T11:37:36.605Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 5.8, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/what-does-the-decoder-with-past-values-means/21088/2', 'internal': True, 'reflection': False, 'title': 'What does the decoder with past values means', 'clicks': 1}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/10', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 244005, 'name': 'Sicheng Mao', 'username': 'alephpi', 'avatar_template': '/user_avatar/discuss.huggingface.co/alephpi/{size}/54288_2.png', 'created_at': '2025-10-23T09:33:46.333Z', 'cooked': '

Hi John,

\n

I’ve finally succeeded in implementing the above things. Thanks for your help!
\nYet I still have some other questions and I think I’d better create a new discussion.

', 'post_number': 11, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-10-23T09:36:01.027Z', 'reply_count': 0, 'reply_to_post_number': 10, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'Sicheng Mao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104516, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/11', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 244029, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-23T21:34:35.488Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 12, 'post_type': 3, 'posts_count': 12, 'updated_at': '2025-10-23T21:34:35.488Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 169036, 'topic_slug': 'how-to-make-my-customized-pipeline-consumable-for-transformers-js', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/how-to-make-my-customized-pipeline-consumable-for-transformers-js/169036/12', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi community,

-

Here is my image-to-text pipeline:

-

(customized means not a registered one in official Transformers)

-

A customized Image processor,

-

A VisionEncoderDecoder, with a customized vision encoder that inherits the PretrainedModel and a MBartDecoder,

-

A WordLevel tokenizer (yes I haven’t used a MBartTokenizer and I have distilled my own one for specific corpus).

-

I want to consume this pipeline in Transformers.js, however I notice that all examples given in Transformers.js documentation seem like pulling from a ready made Transformers pipeline with official components and configurations, I just wonder is it possible to turn my customized pipeline consumable for Transformers.js, or to what extent my pipeline could be partially turned to?

-

My guess is that the I should make my own image preprocessing step and send the image input tensor to the model, in that way, which kind of js libraries you recommend to use? (It won’t be very intensive, just simply resize and normalize things plus a crop-white-margin function which doesn’t exist in Transformers’ image processors).

-

Also just to be sure, is my VisionEncoderDecoder possible to export to an onnx format to be consumable for Transformers.js?

-

Of course my model should be possible to run in browser (and that’s the whole point for me to do this), as it has only 20M parameters (way less than the showcase in Transformers.js)

-

Thanks for your help in advance!

";Yes it is possible : https://huggingface.co/docs/transformers.js/v3.0.0/custom_usage;1 -Issue with TorchCodec when fine-tuning Whisper ASR model;https://discuss.huggingface.co/t/issue-with-torchcodec-when-fine-tuning-whisper-asr-model/169315;169315;5;2025-10-21 07:37:40.941000+00:00;"[{'id': 243905, 'name': 'Ong Jun Rong', 'username': 'junnyrong', 'avatar_template': '/user_avatar/discuss.huggingface.co/junnyrong/{size}/54763_2.png', 'created_at': '2025-10-21T07:37:41.012Z', 'cooked': '

Hello,

\n

In the past I have been fine tuning the Whisper-tiny ASR model using these guides:

\n\n\n\n\n

It was all working fine, I was able do everything locally like loading a pre-trained Whisper-tiny model and also my own dataset until recently when I updated the modules. I have been getting errors like these:

\n

image1430×618 30.9 KB

\n

I have tried falling back and testing the samples provided by the guides and they also seem to have broke and started giving the same error. I also tried running them on Google Colab where it will crash when trying to run a cell like this:

\n

image693×400 11.8 KB

\n

I would like to know if anyone else is also facing the same issue and if there are any solutions for it. Thanks in advance!

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-21T07:37:41.012Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 21, 'reads': 4, 'readers_count': 3, 'score': 50.8, 'yours': False, 'topic_id': 169315, 'topic_slug': 'issue-with-torchcodec-when-fine-tuning-whisper-asr-model', 'display_username': 'Ong Jun Rong', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://learnopencv.com/fine-tuning-whisper-on-custom-dataset/', 'internal': False, 'reflection': False, 'title': 'Fine Tuning Whisper on Custom Dataset', 'clicks': 2}, {'url': 'https://huggingface.co/blog/fine-tune-whisper', 'internal': False, 'reflection': False, 'title': 'Fine-Tune Whisper For Multilingual ASR with 🤗 Transformers', 'clicks': 1}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105467, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/issue-with-torchcodec-when-fine-tuning-whisper-asr-model/169315/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243907, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-21T08:37:37.072Z', 'cooked': '

This error appears to stem from changes to the audio backend in the datasets library. The quickest workaround may be to install using pip install datasets==3.6.0. Additionally, if using version 4.0.0 or later, builder script-type datasets can no longer be used directly from the Hub. You will need to find and use datasets that have been converted to the standard type beforehand. If the original datasets were standard datasets, the latter issue should not be a problem.

\n

Additionally, since Transformers underwent significant changes around version 4.49.0, if you encounter errors related to Whisper, rolling transformers back to version 4.48.3 or earlier would be the simplest workaround. Of course, rewriting for the new version is preferable… but for a temporary fix.

\n
\n

Your error started after upgrading to Datasets 4.x. 4.x switched audio decoding to TorchCodec, which loads FFmpeg at runtime and also requires a matching torch↔torchcodec pair. Accessing or printing an Audio column now triggers that decode path, so if FFmpeg is missing or versions don’t line up, you see the probe-and-fail chain (core7 → core6 → core5 → core4 ... Could not load torchcodec). On Windows this is more brittle, and early 4.0 notes even said Windows was not supported yet. (Hugging Face)

\n

Why it broke now

\n\n

Typical root causes

\n
    \n
  1. FFmpeg missing or wrong major. TorchCodec supports FFmpeg majors 4–7 on all platforms, with 8 only on macOS/Linux. Missing or mismatched DLLs yields your exact probe sequence. (GitHub)
  2. \n
  3. Torch↔TorchCodec mismatch. Use the official matrix. Example: torchcodec 0.7 ↔ torch 2.8; 0.8 ↔ 2.9. (GitHub)
  4. \n
  5. Fresh 4.0 regressions. Multiple reports show 3.x works then 4.x fails until TorchCodec+FFmpeg are added and versions pinned. (GitHub)
  6. \n
\n

Fixes and workarounds

\n

Pick one path. Keep it pinned.

\n

A) Fastest unblock on Windows

\n
# Downgrade Datasets to pre-TorchCodec behavior\npip install ""datasets<4.0.0""  # release notes flagged Windows not ready\n# https://github.com/huggingface/datasets/releases/tag/4.0.0\n
\n

(GitHub)

\n

B) Stay on Datasets 4.x and make it work

\n
# Windows CPU: install FFmpeg and match versions\nconda install -c conda-forge ""ffmpeg<8""        # README recommends conda FFmpeg\npip install ""torch==2.8.*"" ""torchcodec==0.7.*"" # matrix: 0.7 <-> 2.8\n# https://github.com/meta-pytorch/torchcodec#installing-torchcodec\n
\n

If you need CUDA on Windows, use the experimental conda package:

\n
conda install -c conda-forge ""ffmpeg<8"" ""torchcodec=*=*cuda*""\n# https://github.com/meta-pytorch/torchcodec#installing-cuda-enabled-torchcodec\n
\n

(GitHub)

\n

C) Linux or Colab

\n
# Colab VM or Linux\napt-get update && apt-get install -y ffmpeg\npip install -U ""datasets[audio]"" ""torch==2.8.*"" ""torchcodec==0.7.*""\n# HF docs: audio decoding uses TorchCodec + FFmpeg\n# https://huggingface.co/docs/datasets/en/audio_load\n
\n

(Hugging Face)

\n

D) Bypass decoding while you train

\n

Avoid TorchCodec until your env is fixed.

\n
from datasets import Audio\n# Option 1: disable globally\nds = ds.decode(False)  # https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.Dataset.decode\n# Option 2: disable per column\nds = ds.cast_column(""audio"", Audio(decode=False))  # https://huggingface.co/docs/datasets/en/about_dataset_features\n
\n

These return paths/bytes rather than decoded arrays, so printing items won’t invoke TorchCodec. (Hugging Face)

\n

Sanity checks

\n
python - <<\'PY\'\nimport subprocess, sys\nimport torch\nprint(""python:"", sys.version)\nprint(""torch:"", torch.__version__)\ntry:\n    import torchcodec\n    print(""torchcodec:"", torchcodec.__version__)\nexcept Exception as e:\n    print(""torchcodec import failed:"", e)\nsubprocess.run([""ffmpeg"", ""-hide_banner"", ""-version""])\nPY\n# Matrix and FFmpeg policy:\n# https://github.com/meta-pytorch/torchcodec#installing-torchcodec\n
\n

(GitHub)

\n

Context from your linked thread

\n

Your screenshots show Datasets 4.x decoding an Audio column, TorchCodec probing FFmpeg 7→6→5→4, then failing. That matches the new 4.x behavior and the FFmpeg/compatibility requirements above. (Hugging Face Forums)

\n

Extra references and pitfalls

\n', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-21T08:37:37.072Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 5, 'reads': 3, 'readers_count': 2, 'score': 15.6, 'yours': False, 'topic_id': 169315, 'topic_slug': 'issue-with-torchcodec-when-fine-tuning-whisper-asr-model', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/datasets/en/audio_load', 'internal': False, 'reflection': False, 'title': 'Load audio data', 'clicks': 1}, {'url': 'https://github.com/huggingface/datasets/issues/7678', 'internal': False, 'reflection': False, 'title': ""To support decoding audio data, please install 'torchcodec'. · Issue #7678 · huggingface/datasets · GitHub"", 'clicks': 1}, {'url': 'https://newreleases.io/project/github/huggingface/datasets/release/4.0.0', 'internal': False, 'reflection': False, 'title': 'huggingface/datasets 4.0.0 on GitHub', 'clicks': 0}, {'url': 'https://huggingface.co/lhoestq/datasets', 'internal': False, 'reflection': False, 'title': 'lhoestq (Quentin Lhoest)', 'clicks': 0}, {'url': 'https://github.com/meta-pytorch/torchcodec', 'internal': False, 'reflection': False, 'title': 'GitHub - meta-pytorch/torchcodec: PyTorch media decoding and encoding', 'clicks': 0}, {'url': 'https://docs.pytorch.org/audio/main/torchaudio.html', 'internal': False, 'reflection': False, 'title': 'torchaudio — Torchaudio 2.8.0 documentation', 'clicks': 0}, {'url': 'https://github.com/huggingface/datasets/releases', 'internal': False, 'reflection': False, 'title': 'Releases · huggingface/datasets · GitHub', 'clicks': 0}, {'url': 'https://github.com/pytorch/torchcodec/issues/570', 'internal': False, 'reflection': False, 'title': 'torchcodec not compatible with brew-installed ffmpeg · Issue #570 · meta-pytorch/torchcodec · GitHub', 'clicks': 0}, {'url': 'https://huggingface.co/docs/datasets/en/package_reference/main_classes', 'internal': False, 'reflection': False, 'title': 'Main classes', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/issue-with-torchcodec-when-fine-tuning-whisper-asr-model/169315/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 243937, 'name': 'Ong Jun Rong', 'username': 'junnyrong', 'avatar_template': '/user_avatar/discuss.huggingface.co/junnyrong/{size}/54763_2.png', 'created_at': '2025-10-22T01:45:23.750Z', 'cooked': '

I was pulling my hair thinking it has something to do with TorchCodec’s versioning, it never came to me that it might have been datasets! Thank you so much for the detailed explanation too, that solved my issue

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-22T01:45:23.750Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 169315, 'topic_slug': 'issue-with-torchcodec-when-fine-tuning-whisper-asr-model', 'display_username': 'Ong Jun Rong', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105467, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/issue-with-torchcodec-when-fine-tuning-whisper-asr-model/169315/3', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243964, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-22T13:45:34.064Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-10-22T13:45:34.064Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 1, 'readers_count': 0, 'score': 5.2, 'yours': False, 'topic_id': 169315, 'topic_slug': 'issue-with-torchcodec-when-fine-tuning-whisper-asr-model', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/issue-with-torchcodec-when-fine-tuning-whisper-asr-model/169315/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hello,

-

In the past I have been fine tuning the Whisper-tiny ASR model using these guides:

- - - - -

It was all working fine, I was able do everything locally like loading a pre-trained Whisper-tiny model and also my own dataset until recently when I updated the modules. I have been getting errors like these:

-

image1430×618 30.9 KB

-

I have tried falling back and testing the samples provided by the guides and they also seem to have broke and started giving the same error. I also tried running them on Google Colab where it will crash when trying to run a cell like this:

-

image693×400 11.8 KB

-

I would like to know if anyone else is also facing the same issue and if there are any solutions for it. Thanks in advance!

";"

This error appears to stem from changes to the audio backend in the datasets library. The quickest workaround may be to install using pip install datasets==3.6.0. Additionally, if using version 4.0.0 or later, builder script-type datasets can no longer be used directly from the Hub. You will need to find and use datasets that have been converted to the standard type beforehand. If the original datasets were standard datasets, the latter issue should not be a problem.

-

Additionally, since Transformers underwent significant changes around version 4.49.0, if you encounter errors related to Whisper, rolling transformers back to version 4.48.3 or earlier would be the simplest workaround. Of course, rewriting for the new version is preferable… but for a temporary fix.

-
-

Your error started after upgrading to Datasets 4.x. 4.x switched audio decoding to TorchCodec, which loads FFmpeg at runtime and also requires a matching torch↔torchcodec pair. Accessing or printing an Audio column now triggers that decode path, so if FFmpeg is missing or versions don’t line up, you see the probe-and-fail chain (core7 → core6 → core5 → core4 ... Could not load torchcodec). On Windows this is more brittle, and early 4.0 notes even said Windows was not supported yet. (Hugging Face)

-

Why it broke now

- -

Typical root causes

-
    -
  1. FFmpeg missing or wrong major. TorchCodec supports FFmpeg majors 4–7 on all platforms, with 8 only on macOS/Linux. Missing or mismatched DLLs yields your exact probe sequence. (GitHub)
  2. -
  3. Torch↔TorchCodec mismatch. Use the official matrix. Example: torchcodec 0.7 ↔ torch 2.8; 0.8 ↔ 2.9. (GitHub)
  4. -
  5. Fresh 4.0 regressions. Multiple reports show 3.x works then 4.x fails until TorchCodec+FFmpeg are added and versions pinned. (GitHub)
  6. -
-

Fixes and workarounds

-

Pick one path. Keep it pinned.

-

A) Fastest unblock on Windows

-
# Downgrade Datasets to pre-TorchCodec behavior
-pip install ""datasets<4.0.0""  # release notes flagged Windows not ready
-# https://github.com/huggingface/datasets/releases/tag/4.0.0
-
-

(GitHub)

-

B) Stay on Datasets 4.x and make it work

-
# Windows CPU: install FFmpeg and match versions
-conda install -c conda-forge ""ffmpeg<8""        # README recommends conda FFmpeg
-pip install ""torch==2.8.*"" ""torchcodec==0.7.*"" # matrix: 0.7 <-> 2.8
-# https://github.com/meta-pytorch/torchcodec#installing-torchcodec
-
-

If you need CUDA on Windows, use the experimental conda package:

-
conda install -c conda-forge ""ffmpeg<8"" ""torchcodec=*=*cuda*""
-# https://github.com/meta-pytorch/torchcodec#installing-cuda-enabled-torchcodec
-
-

(GitHub)

-

C) Linux or Colab

-
# Colab VM or Linux
-apt-get update && apt-get install -y ffmpeg
-pip install -U ""datasets[audio]"" ""torch==2.8.*"" ""torchcodec==0.7.*""
-# HF docs: audio decoding uses TorchCodec + FFmpeg
-# https://huggingface.co/docs/datasets/en/audio_load
-
-

(Hugging Face)

-

D) Bypass decoding while you train

-

Avoid TorchCodec until your env is fixed.

-
from datasets import Audio
-# Option 1: disable globally
-ds = ds.decode(False)  # https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.Dataset.decode
-# Option 2: disable per column
-ds = ds.cast_column(""audio"", Audio(decode=False))  # https://huggingface.co/docs/datasets/en/about_dataset_features
-
-

These return paths/bytes rather than decoded arrays, so printing items won’t invoke TorchCodec. (Hugging Face)

-

Sanity checks

-
python - <<'PY'
-import subprocess, sys
-import torch
-print(""python:"", sys.version)
-print(""torch:"", torch.__version__)
-try:
-    import torchcodec
-    print(""torchcodec:"", torchcodec.__version__)
-except Exception as e:
-    print(""torchcodec import failed:"", e)
-subprocess.run([""ffmpeg"", ""-hide_banner"", ""-version""])
-PY
-# Matrix and FFmpeg policy:
-# https://github.com/meta-pytorch/torchcodec#installing-torchcodec
-
-

(GitHub)

-

Context from your linked thread

-

Your screenshots show Datasets 4.x decoding an Audio column, TorchCodec probing FFmpeg 7→6→5→4, then failing. That matches the new 4.x behavior and the FFmpeg/compatibility requirements above. (Hugging Face Forums)

-

Extra references and pitfalls

-";1 -[HF Space not starting] Repeatedly crashes: @semmyKG];https://discuss.huggingface.co/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242;169242;24;2025-10-17 14:59:37.863000+00:00;"[{'id': 243751, 'name': 'Researcher', 'username': 'semmyk', 'avatar_template': '/user_avatar/discuss.huggingface.co/semmyk/{size}/46712_2.png', 'created_at': '2025-10-17T14:59:37.920Z', 'cooked': '

[HF Space repeatedly crashes: semmyKG]

\n

HF support team,

\n

May we request your kind assistance in looking into this HF space

\n\n

We have made private and public
\nWe have restarted multiple times: from the debug, from settings
\nWe have factory rebuilt from settings

\n

It appears the requirements were ‘successfully’ installed.

\n

The last logs

\n
===== Application Startup at 2025-10-17 14:16:51 ===== \n=== Application restarted at 2025-10-17 14:18:42.702953130 UTC === \n=== Application restarted at 2025-10-17 14:18:42.703405200 UTC === \n=== Application restarted at 2025-10-17 14:18:42.708956192 UTC === \n=== Application stopped (exit code: 0) at 2025-10-17 14:18:53.031719893 UTC ===\n
', 'post_number': 1, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-10-17T14:59:37.920Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 44, 'reads': 6, 'readers_count': 5, 'score': 66.2, 'yours': False, 'topic_id': 169242, 'topic_slug': 'hf-space-not-starting-repeatedly-crashes-semmykg', 'display_username': 'Researcher', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/spaces/semmyk/semmyKG', 'internal': False, 'reflection': False, 'title': 'semmyKG - Knowledge Graph visualiser toolkit (builder from markdown) - a Hugging Face Space by semmyk', 'clicks': 4}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 92554, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243754, 'name': 'Megan Riley', 'username': 'meganariley', 'avatar_template': '/user_avatar/discuss.huggingface.co/meganariley/{size}/20596_2.png', 'created_at': '2025-10-17T17:09:42.992Z', 'cooked': '

Hey, thanks for reporting! We’re investigating and I’ll update you soon.

', 'post_number': 2, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-10-17T17:09:42.992Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 5, 'reads': 5, 'readers_count': 4, 'score': 31.0, 'yours': False, 'topic_id': 169242, 'topic_slug': 'hf-space-not-starting-repeatedly-crashes-semmykg', 'display_username': 'Megan Riley', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': True, 'admin': False, 'staff': True, 'user_id': 31941, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242/2', 'reactions': [{'id': 'hugs', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243890, 'name': 'Megan Riley', 'username': 'meganariley', 'avatar_template': '/user_avatar/discuss.huggingface.co/meganariley/{size}/20596_2.png', 'created_at': '2025-10-20T22:36:55.714Z', 'cooked': '

Hi @semmyk can you please disable Dev Mode in the settings of the Space and restart? Let us know if you continue experiencing issues.

', 'post_number': 3, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-10-20T22:36:55.714Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 4, 'readers_count': 3, 'score': 20.8, 'yours': False, 'topic_id': 169242, 'topic_slug': 'hf-space-not-starting-repeatedly-crashes-semmykg', 'display_username': 'Megan Riley', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': True, 'admin': False, 'staff': True, 'user_id': 31941, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242/3', 'reactions': [{'id': 'hugs', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243894, 'name': 'Researcher', 'username': 'semmyk', 'avatar_template': '/user_avatar/discuss.huggingface.co/semmyk/{size}/46712_2.png', 'created_at': '2025-10-21T00:00:13.744Z', 'cooked': '

@meganariley Thanks for coming back too us. We’ve disabled Dev Mode: … Getting …

\n

runtime error … Exit code: 0. Reason: application does not seem to be initialized

\n
===== Application Startup at 2025-10-20 23:50:46 =====\n
\n

NB: Also tried … Restart Space, Factory reset, restart Space, Disable Dev, enable Dev mode, restart, Disable Dev Mode

', 'post_number': 4, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-10-21T00:00:13.744Z', 'reply_count': 0, 'reply_to_post_number': 3, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 169242, 'topic_slug': 'hf-space-not-starting-repeatedly-crashes-semmykg', 'display_username': 'Researcher', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 31941, 'username': 'meganariley', 'name': 'Megan Riley', 'avatar_template': '/user_avatar/discuss.huggingface.co/meganariley/{size}/20596_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 92554, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242/4', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243895, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-21T00:10:55.333Z', 'cooked': '

In README.md:

\n
app_file: app_gradio_lightrag.py\n
\n

But seems actual Gradio UI code is in app.py.
\nSo, setting app_file: app.py might resolve the issue?

', 'post_number': 5, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-10-21T00:10:55.333Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 5, 'reads': 4, 'readers_count': 3, 'score': 30.8, 'yours': False, 'topic_id': 169242, 'topic_slug': 'hf-space-not-starting-repeatedly-crashes-semmykg', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/spaces/semmyk/semmyKG/blob/main/README.md', 'internal': False, 'reflection': False, 'title': 'README.md · semmyk/semmyKG at main', 'clicks': 0}, {'url': 'https://huggingface.co/spaces/semmyk/semmyKG/blob/main/app_gradio_lightrag.py#L831', 'internal': False, 'reflection': False, 'title': 'app_gradio_lightrag.py · semmyk/semmyKG at main', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242/5', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 243926, 'name': 'Researcher', 'username': 'semmyk', 'avatar_template': '/user_avatar/discuss.huggingface.co/semmyk/{size}/46712_2.png', 'created_at': '2025-10-21T18:51:20.001Z', 'cooked': '

@John6666 oops, . That gets it initialised. Apparently, we forgot to update that section of the README after we spilt the Entre point + Gradio UI from the processing coordinating module.

\n

We’d update once we Space working. At the moment, there is port issue.

', 'post_number': 6, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-10-21T18:51:20.001Z', 'reply_count': 0, 'reply_to_post_number': 5, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 3, 'readers_count': 2, 'score': 25.6, 'yours': False, 'topic_id': 169242, 'topic_slug': 'hf-space-not-starting-repeatedly-crashes-semmykg', 'display_username': 'Researcher', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 92554, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242/6', 'reactions': [{'id': 'laughing', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243953, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-22T10:44:41.140Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 7, 'post_type': 3, 'posts_count': 7, 'updated_at': '2025-10-22T10:44:41.140Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 169242, 'topic_slug': 'hf-space-not-starting-repeatedly-crashes-semmykg', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/hf-space-not-starting-repeatedly-crashes-semmykg/169242/7', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

[HF Space repeatedly crashes: semmyKG]

-

HF support team,

-

May we request your kind assistance in looking into this HF space

- -

We have made private and public
-We have restarted multiple times: from the debug, from settings
-We have factory rebuilt from settings

-

It appears the requirements were ‘successfully’ installed.

-

The last logs

-
===== Application Startup at 2025-10-17 14:16:51 ===== 
-=== Application restarted at 2025-10-17 14:18:42.702953130 UTC === 
-=== Application restarted at 2025-10-17 14:18:42.703405200 UTC === 
-=== Application restarted at 2025-10-17 14:18:42.708956192 UTC === 
-=== Application stopped (exit code: 0) at 2025-10-17 14:18:53.031719893 UTC ===
-
";"

In README.md:

-
app_file: app_gradio_lightrag.py
-
-

But seems actual Gradio UI code is in app.py.
-So, setting app_file: app.py might resolve the issue?

";1 -Replacing attention class with identical subclass creates hallucinations;https://discuss.huggingface.co/t/replacing-attention-class-with-identical-subclass-creates-hallucinations/169215;169215;6;2025-10-16 11:23:27.606000+00:00;"[{'id': 243707, 'name': 'Alexander Jephtha', 'username': 'AlexJephtha', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/d9b06d/{size}.png', 'created_at': '2025-10-16T11:23:27.668Z', 'cooked': '

I’m writing a custom versions of LlamaModels, and for one of those approaches I want to overwrite the attention mechanism of each layer. My code looks like this. Note that even when I define LlamaAttentionHybrid (a subclass of LlamaAttention) to be the exact same as LlamaAttention, I still get hallucination issues. This suggest I’m not correctly replacing the attention mechanism.

\n
class LlamaHybridForCausalLM(LlamaForCausalLM):\n    def __init__(self, config: LlamaHybridConfig):\n        super().__init__(config)\n        if config.hybrid:\n            for i, layer in enumerate(self.model.layers):\n                # Need to also copy attention weights\n                old_attn = layer.self_attn\n                layer.self_attn = LlamaAttentionHybrid(config, i)\n                layer.self_attn.load_state_dict(old_attn.state_dict())\n
\n

However, the model works completely fine when I write this code:

\n
class LlamaHybridForCausalLM(LlamaForCausalLM):\n    def __init__(self, config: LlamaHybridConfig):\n        super().__init__(config)\n        if config.hybrid:\n            for i, layer in enumerate(self.model.layers):\n                # Need to also copy attention weights\n                old_attn = layer.self_attn\n                layer.self_attn = LlamaAttention(config, i)\n                layer.self_attn.load_state_dict(old_attn.state_dict())\n
\n

Why would this happen even when in the subclass i don’t make any changes? Note, that the forward function here is defined exactly the same as the source code.

\n
class LlamaAttentionHybrid(LlamaAttention):\n    def __init__(self, config: LlamaHybridConfig, layer_idx: int):\n        super().__init__(config, layer_idx)\n\n    def forward(\n        self,\n        hidden_states: torch.Tensor,\n        position_embeddings: tuple[torch.Tensor, torch.Tensor],\n        attention_mask: Optional[torch.Tensor],\n        past_key_values: Optional[Cache] = None,\n        cache_position: Optional[torch.LongTensor] = None,\n        **kwargs: Unpack[FlashAttentionKwargs],\n    ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:\n\n        input_shape = hidden_states.shape[:-1]\n        hidden_shape = (*input_shape, -1, self.head_dim)\n\n        query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)\n        key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)\n        value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)\n\n        cos, sin = position_embeddings\n        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)\n\n        if past_key_values is not None:\n            # sin and cos are specific to RoPE models; cache_position needed for the static cache\n            cache_kwargs = {""sin"": sin, ""cos"": cos, ""cache_position"": cache_position}\n            key_states, value_states = past_key_values.update(key_states, value_states, self.layer_idx, cache_kwargs)\n\n        attention_interface: Callable = eager_attention_forward\n        if self.config._attn_implementation != ""eager"":\n            attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]\n\n        attn_output, attn_weights = attention_interface(\n            self,\n            query_states,\n            key_states,\n            value_states,\n            attention_mask,\n            dropout=0.0 if not self.training else self.attention_dropout,\n            scaling=self.scaling,\n            **kwargs,\n        )\n\n        attn_output = attn_output.reshape(*input_shape, -1).contiguous()\n        attn_output = self.o_proj(attn_output)\n        return attn_output, attn_weights\n
\n

Thanks!

\n

EDIT: I narrowed the issue down to the redefining of the forward function. For some reason when I add the forward function into the subclass even if it’s identical, the model hallucinates dramatically.

', 'post_number': 1, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-10-16T11:35:01.753Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 169215, 'topic_slug': 'replacing-attention-class-with-identical-subclass-creates-hallucinations', 'display_username': 'Alexander Jephtha', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 5, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 30474, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/replacing-attention-class-with-identical-subclass-creates-hallucinations/169215/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243732, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-17T04:12:47.941Z', 'cooked': '

There may be points that can be fixed.

', 'post_number': 2, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-10-17T04:12:47.941Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 20.6, 'yours': False, 'topic_id': 169215, 'topic_slug': 'replacing-attention-class-with-identical-subclass-creates-hallucinations', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/datasets/John6666/forum2/blob/main/attn_override_issue_1.md', 'internal': False, 'reflection': False, 'title': 'attn_override_issue_1.md · John6666/forum2 at main', 'clicks': 2}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/replacing-attention-class-with-identical-subclass-creates-hallucinations/169215/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243819, 'name': 'Alexander Jephtha', 'username': 'AlexJephtha', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/d9b06d/{size}.png', 'created_at': '2025-10-20T03:52:17.985Z', 'cooked': '

Thanks for your help!

', 'post_number': 3, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-10-20T03:52:17.985Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 169215, 'topic_slug': 'replacing-attention-class-with-identical-subclass-creates-hallucinations', 'display_username': 'Alexander Jephtha', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 30474, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/replacing-attention-class-with-identical-subclass-creates-hallucinations/169215/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243821, 'name': 'Alexander Jephtha', 'username': 'AlexJephtha', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/a/d9b06d/{size}.png', 'created_at': '2025-10-20T03:57:16.952Z', 'cooked': '

SOLUTION: With SDPA attention, passing in an attention_mask with value not equal to none overrides the causal attention mask! You need to fill the attention mask with -inf (or large negative number) in the upper right triangle. This is only really a problem when calculating the attention scores of the initial text input, since newly generated tokens don’t require any of the existing key tokens to be masked.

', 'post_number': 4, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-10-20T03:57:16.952Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 169215, 'topic_slug': 'replacing-attention-class-with-identical-subclass-creates-hallucinations', 'display_username': 'Alexander Jephtha', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 30474, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/replacing-attention-class-with-identical-subclass-creates-hallucinations/169215/4', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 243867, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-20T15:57:45.831Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 5, 'post_type': 3, 'posts_count': 5, 'updated_at': '2025-10-20T15:57:45.831Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 169215, 'topic_slug': 'replacing-attention-class-with-identical-subclass-creates-hallucinations', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/replacing-attention-class-with-identical-subclass-creates-hallucinations/169215/5', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

I’m writing a custom versions of LlamaModels, and for one of those approaches I want to overwrite the attention mechanism of each layer. My code looks like this. Note that even when I define LlamaAttentionHybrid (a subclass of LlamaAttention) to be the exact same as LlamaAttention, I still get hallucination issues. This suggest I’m not correctly replacing the attention mechanism.

-
class LlamaHybridForCausalLM(LlamaForCausalLM):
-    def __init__(self, config: LlamaHybridConfig):
-        super().__init__(config)
-        if config.hybrid:
-            for i, layer in enumerate(self.model.layers):
-                # Need to also copy attention weights
-                old_attn = layer.self_attn
-                layer.self_attn = LlamaAttentionHybrid(config, i)
-                layer.self_attn.load_state_dict(old_attn.state_dict())
-
-

However, the model works completely fine when I write this code:

-
class LlamaHybridForCausalLM(LlamaForCausalLM):
-    def __init__(self, config: LlamaHybridConfig):
-        super().__init__(config)
-        if config.hybrid:
-            for i, layer in enumerate(self.model.layers):
-                # Need to also copy attention weights
-                old_attn = layer.self_attn
-                layer.self_attn = LlamaAttention(config, i)
-                layer.self_attn.load_state_dict(old_attn.state_dict())
-
-

Why would this happen even when in the subclass i don’t make any changes? Note, that the forward function here is defined exactly the same as the source code.

-
class LlamaAttentionHybrid(LlamaAttention):
-    def __init__(self, config: LlamaHybridConfig, layer_idx: int):
-        super().__init__(config, layer_idx)
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        position_embeddings: tuple[torch.Tensor, torch.Tensor],
-        attention_mask: Optional[torch.Tensor],
-        past_key_values: Optional[Cache] = None,
-        cache_position: Optional[torch.LongTensor] = None,
-        **kwargs: Unpack[FlashAttentionKwargs],
-    ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
-
-        input_shape = hidden_states.shape[:-1]
-        hidden_shape = (*input_shape, -1, self.head_dim)
-
-        query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
-        key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
-        value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
-
-        cos, sin = position_embeddings
-        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
-
-        if past_key_values is not None:
-            # sin and cos are specific to RoPE models; cache_position needed for the static cache
-            cache_kwargs = {""sin"": sin, ""cos"": cos, ""cache_position"": cache_position}
-            key_states, value_states = past_key_values.update(key_states, value_states, self.layer_idx, cache_kwargs)
-
-        attention_interface: Callable = eager_attention_forward
-        if self.config._attn_implementation != ""eager"":
-            attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
-
-        attn_output, attn_weights = attention_interface(
-            self,
-            query_states,
-            key_states,
-            value_states,
-            attention_mask,
-            dropout=0.0 if not self.training else self.attention_dropout,
-            scaling=self.scaling,
-            **kwargs,
-        )
-
-        attn_output = attn_output.reshape(*input_shape, -1).contiguous()
-        attn_output = self.o_proj(attn_output)
-        return attn_output, attn_weights
-
-

Thanks!

-

EDIT: I narrowed the issue down to the redefining of the forward function. For some reason when I add the forward function into the subclass even if it’s identical, the model hallucinates dramatically.

";

SOLUTION: With SDPA attention, passing in an attention_mask with value not equal to none overrides the causal attention mask! You need to fill the attention mask with -inf (or large negative number) in the upper right triangle. This is only really a problem when calculating the attention scores of the initial text input, since newly generated tokens don’t require any of the existing key tokens to be masked.

;1 -Cannot load Conll2003;https://discuss.huggingface.co/t/cannot-load-conll2003/169142;169142;10;2025-10-14 12:17:33.072000+00:00;"[{'id': 243574, 'name': 'Radek Štulc', 'username': 'stulcrad', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/4bbf92/{size}.png', 'created_at': '2025-10-14T12:17:33.129Z', 'cooked': '

I am trying to load conll2003 dataset the basic way I learned like this

\n
from datasets import load_dataset\ndataset = load_dataset(""conll2003"")\n
\n

but I am running into this error

\n
---------------------------------------------------------------------------\nRuntimeError                              Traceback (most recent call last)\nCell In[15], line 3\n      1 from datasets import load_dataset\n----> 3 dataset = load_dataset(""conll2003"")\n\nFile ~/.local/lib/python3.12/site-packages/datasets/load.py:1397, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, keep_in_memory, save_infos, revision, token, streaming, num_proc, storage_options, **config_kwargs)\n   1392 verification_mode = VerificationMode(\n   1393     (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS\n   1394 )\n   1396 # Create a dataset builder\n-> 1397 builder_instance = load_dataset_builder(\n   1398     path=path,\n   1399     name=name,\n   1400     data_dir=data_dir,\n   1401     data_files=data_files,\n   1402     cache_dir=cache_dir,\n   1403     features=features,\n   1404     download_config=download_config,\n   1405     download_mode=download_mode,\n   1406     revision=revision,\n   1407     token=token,\n   1408     storage_options=storage_options,\n   1409     **config_kwargs,\n   1410 )\n   1412 # Return iterable dataset in case of streaming\n   1413 if streaming:\n\nFile ~/.local/lib/python3.12/site-packages/datasets/load.py:1137, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, storage_options, **config_kwargs)\n   1135 if features is not None:\n   1136     features = _fix_for_backward_compatible_features(features)\n-> 1137 dataset_module = dataset_module_factory(\n   1138     path,\n   1139     revision=revision,\n   1140     download_config=download_config,\n   1141     download_mode=download_mode,\n   1142     data_dir=data_dir,\n   1143     data_files=data_files,\n   1144     cache_dir=cache_dir,\n   1145 )\n   1146 # Get dataset builder class\n   1147 builder_kwargs = dataset_module.builder_kwargs\n\nFile ~/.local/lib/python3.12/site-packages/datasets/load.py:1036, in dataset_module_factory(path, revision, download_config, download_mode, data_dir, data_files, cache_dir, **download_kwargs)\n   1031             if isinstance(e1, FileNotFoundError):\n   1032                 raise FileNotFoundError(\n   1033                     f""Couldn\'t find any data file at {relative_to_absolute_path(path)}. ""\n   1034                     f""Couldn\'t find \'{path}\' on the Hugging Face Hub either: {type(e1).__name__}: {e1}""\n   1035                 ) from None\n-> 1036             raise e1 from None\n   1037 else:\n   1038     raise FileNotFoundError(f""Couldn\'t find any data file at {relative_to_absolute_path(path)}."")\n\nFile ~/.local/lib/python3.12/site-packages/datasets/load.py:994, in dataset_module_factory(path, revision, download_config, download_mode, data_dir, data_files, cache_dir, **download_kwargs)\n    986 try:\n    987     api.hf_hub_download(\n    988         repo_id=path,\n    989         filename=filename,\n   (...)\n    992         proxies=download_config.proxies,\n    993     )\n--> 994     raise RuntimeError(f""Dataset scripts are no longer supported, but found {filename}"")\n    995 except EntryNotFoundError:\n    996     # Use the infos from the parquet export except in some cases:\n    997     if data_dir or data_files or (revision and revision != ""main""):\n\nRuntimeError: Dataset scripts are no longer supported, but found conll2003.py\n
\n

Could someone tell me what is wrong?

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-14T12:17:33.129Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 43, 'reads': 8, 'readers_count': 7, 'score': 121.4, 'yours': False, 'topic_id': 169142, 'topic_slug': 'cannot-load-conll2003', 'display_username': 'Radek Štulc', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 41660, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/cannot-load-conll2003/169142/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243575, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-14T12:28:06.176Z', 'cooked': '

Try:

\n
from datasets import load_dataset\ndataset = load_dataset(""lhoestq/conll2003"")\n
\n

This is because support for trust_remote_code=True was removed in datasets library version 4.0.0 and later. You can work around this by using datasets that don’t rely on builder scripts (like the one shown above) or by downgrading the datasets library to version 3.6.0 or earlier.

', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-14T12:28:06.176Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 8, 'readers_count': 7, 'score': 21.4, 'yours': False, 'topic_id': 169142, 'topic_slug': 'cannot-load-conll2003', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/datasets/releases/tag/4.0.0', 'internal': False, 'reflection': False, 'title': 'Release 4.0.0 · huggingface/datasets · GitHub', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/cannot-load-conll2003/169142/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 243576, 'name': 'Radek Štulc', 'username': 'stulcrad', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/4bbf92/{size}.png', 'created_at': '2025-10-14T12:35:37.592Z', 'cooked': '

That works, thank you.
\nThat’s interesting, so I assume the support for loading scripts has also been removed, so if I want to upload a custom dataset, I will need to manually convert it into DatasetDict and push it using this class.

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-10-14T12:35:37.592Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 7, 'readers_count': 6, 'score': 16.2, 'yours': False, 'topic_id': 169142, 'topic_slug': 'cannot-load-conll2003', 'display_username': 'Radek Štulc', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 41660, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/cannot-load-conll2003/169142/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243611, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-15T00:36:12.117Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-10-15T00:36:12.117Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 5, 'readers_count': 4, 'score': 5.8, 'yours': False, 'topic_id': 169142, 'topic_slug': 'cannot-load-conll2003', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/cannot-load-conll2003/169142/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

I am trying to load conll2003 dataset the basic way I learned like this

-
from datasets import load_dataset
-dataset = load_dataset(""conll2003"")
-
-

but I am running into this error

-
---------------------------------------------------------------------------
-RuntimeError                              Traceback (most recent call last)
-Cell In[15], line 3
-      1 from datasets import load_dataset
-----> 3 dataset = load_dataset(""conll2003"")
-
-File ~/.local/lib/python3.12/site-packages/datasets/load.py:1397, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, keep_in_memory, save_infos, revision, token, streaming, num_proc, storage_options, **config_kwargs)
-   1392 verification_mode = VerificationMode(
-   1393     (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
-   1394 )
-   1396 # Create a dataset builder
--> 1397 builder_instance = load_dataset_builder(
-   1398     path=path,
-   1399     name=name,
-   1400     data_dir=data_dir,
-   1401     data_files=data_files,
-   1402     cache_dir=cache_dir,
-   1403     features=features,
-   1404     download_config=download_config,
-   1405     download_mode=download_mode,
-   1406     revision=revision,
-   1407     token=token,
-   1408     storage_options=storage_options,
-   1409     **config_kwargs,
-   1410 )
-   1412 # Return iterable dataset in case of streaming
-   1413 if streaming:
-
-File ~/.local/lib/python3.12/site-packages/datasets/load.py:1137, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, storage_options, **config_kwargs)
-   1135 if features is not None:
-   1136     features = _fix_for_backward_compatible_features(features)
--> 1137 dataset_module = dataset_module_factory(
-   1138     path,
-   1139     revision=revision,
-   1140     download_config=download_config,
-   1141     download_mode=download_mode,
-   1142     data_dir=data_dir,
-   1143     data_files=data_files,
-   1144     cache_dir=cache_dir,
-   1145 )
-   1146 # Get dataset builder class
-   1147 builder_kwargs = dataset_module.builder_kwargs
-
-File ~/.local/lib/python3.12/site-packages/datasets/load.py:1036, in dataset_module_factory(path, revision, download_config, download_mode, data_dir, data_files, cache_dir, **download_kwargs)
-   1031             if isinstance(e1, FileNotFoundError):
-   1032                 raise FileNotFoundError(
-   1033                     f""Couldn't find any data file at {relative_to_absolute_path(path)}. ""
-   1034                     f""Couldn't find '{path}' on the Hugging Face Hub either: {type(e1).__name__}: {e1}""
-   1035                 ) from None
--> 1036             raise e1 from None
-   1037 else:
-   1038     raise FileNotFoundError(f""Couldn't find any data file at {relative_to_absolute_path(path)}."")
-
-File ~/.local/lib/python3.12/site-packages/datasets/load.py:994, in dataset_module_factory(path, revision, download_config, download_mode, data_dir, data_files, cache_dir, **download_kwargs)
-    986 try:
-    987     api.hf_hub_download(
-    988         repo_id=path,
-    989         filename=filename,
-   (...)
-    992         proxies=download_config.proxies,
-    993     )
---> 994     raise RuntimeError(f""Dataset scripts are no longer supported, but found {filename}"")
-    995 except EntryNotFoundError:
-    996     # Use the infos from the parquet export except in some cases:
-    997     if data_dir or data_files or (revision and revision != ""main""):
-
-RuntimeError: Dataset scripts are no longer supported, but found conll2003.py
-
-

Could someone tell me what is wrong?

";"

Try:

-
from datasets import load_dataset
-dataset = load_dataset(""lhoestq/conll2003"")
-
-

This is because support for trust_remote_code=True was removed in datasets library version 4.0.0 and later. You can work around this by using datasets that don’t rely on builder scripts (like the one shown above) or by downgrading the datasets library to version 3.6.0 or earlier.

";1 -WGET with Token not working;https://discuss.huggingface.co/t/wget-with-token-not-working/169024;169024;5;2025-10-08 09:03:54.478000+00:00;"[{'id': 243271, 'name': 'Lelièvre', 'username': 'RenanL', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/8dc957/{size}.png', 'created_at': '2025-10-08T09:03:54.532Z', 'cooked': '

Dear Hughingface Team,

\n

I’m using runpod with the templates “ComfyUI - AI-Dock”.

\n

In JupyterLab I want to download a login protected model, the one from black-forest-labs/FLUX.1-Krea-dev.

\n

wget used to work like that, I can download the model from my browser after login on my local pc.

\n

wget --header=“Authorization: Bearer TOKEN” ``https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors

\n

But I get

\n
401 Unauthorized\nUsername/Password Authentication Failed.\n
\n

If I add –debug at the end. I get:

\n
DEBUG output created by Wget 1.21.2 on linux-gnu.\n\nReading HSTS entries from /home/user/.wget-hsts\nURI encoding = ‘UTF-8’\nConverted file name \'flux1-dev.safetensors\' (UTF-8) -> \'flux1-dev.safetensors\' (UTF-8)\n--2025-10-08 09:03:02--  https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors\nResolving huggingface.co (huggingface.co)... 52.84.217.103, 52.84.217.69, 52.84.217.102, ...\nCaching huggingface.co => 52.84.217.103 52.84.217.69 52.84.217.102 52.84.217.88 2600:9000:203d:6200:17:b174:6d00:93a1 2600:9000:203d:e000:17:b174:6d00:93a1 2600:9000:203d:8800:17:b174:6d00:93a1 2600:9000:203d:e800:17:b174:6d00:93a1 2600:9000:203d:9600:17:b174:6d00:93a1 2600:9000:203d:2400:17:b174:6d00:93a1 2600:9000:203d:ee00:17:b174:6d00:93a1 2600:9000:203d:6400:17:b174:6d00:93a1\nConnecting to huggingface.co (huggingface.co)|52.84.217.103|:443... connected.\nCreated socket 3.\nReleasing 0x000061bc69c86ec0 (new refcount 1).\nInitiating SSL handshake.\nHandshake successful; connected socket 3 to SSL handle 0x000061bc69c888a0\ncertificate:\n  subject: CN=huggingface.co\n  issuer:  CN=Amazon RSA 2048 M02,O=Amazon,C=US\nX509 certificate successfully verified and matches host huggingface.co\n\n---request begin---\nGET /black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors HTTP/1.1\nHost: huggingface.co\nUser-Agent: Wget/1.21.2\nAccept: */*\nAccept-Encoding: identity\nConnection: Keep-Alive\nAuthorization: Bearer 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\n\n---request end---\nHTTP request sent, awaiting response... \n---response begin---\nHTTP/1.1 401 Unauthorized\nContent-Type: text/html; charset=utf-8\nContent-Length: 22349\nConnection: keep-alive\nDate: Wed, 08 Oct 2025 09:03:02 GMT\nETag: W/""574d-1eC4sA5Q/PbQ5YhsvC0L0NiNhEc""\nX-Powered-By: huggingface-moon\nRateLimit: ""pages"";r=999;t=66\nRateLimit-Policy: ""fixed window"";""pages"";q=1000;w=300\ncross-origin-opener-policy: same-origin\nReferrer-Policy: strict-origin-when-cross-origin\nX-Request-Id: Root=1-68e628c6-753c6a394bc274c7764e5a2f\nX-Error-Message: Invalid credentials in Authorization header\nx-frame-options: SAMEORIGIN\nX-Cache: Error from cloudfront\nVia: 1.1 fdd255cb127a7759980ee879db5de580.cloudfront.net (CloudFront)\nX-Amz-Cf-Pop: DFW59-P5\nX-Amz-Cf-Id: tZ4CtuVneK0RyHpWtL5_DbEc3eq4qqEMlGoXvt8V9CLxqmo2CX4puw==\n\n---response end---\n401 Unauthorized\nRegistered socket 3 for persistent reuse.\nDisabling further reuse of socket 3.\nClosed 3/SSL 0x000061bc69c888a0\n\nUsername/Password Authentication Failed.\n
\n

Thank you for looking into that.

', 'post_number': 1, 'post_type': 1, 'posts_count': 8, 'updated_at': '2025-10-08T09:03:54.532Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 15, 'reads': 6, 'readers_count': 5, 'score': 61.2, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'Lelièvre', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105173, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/wget-with-token-not-working/169024/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243288, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-08T10:22:28.337Z', 'cooked': '

How about resolve instead of blob for now?
\nwget --header=""Authorization: Bearer TOKEN"" ""https://huggingface.co/black-forest-labs/FLUX.1-dev/resolve/main/flux1-dev.safetensors""

', 'post_number': 2, 'post_type': 1, 'posts_count': 8, 'updated_at': '2025-10-08T10:23:15.516Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 16.0, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/wget-with-token-not-working/169024/2', 'reactions': [{'id': 'hugs', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 243295, 'name': 'Lelièvre', 'username': 'RenanL', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/8dc957/{size}.png', 'created_at': '2025-10-08T11:27:51.251Z', 'cooked': '

resolve is solving the problem!

\n

Thank you so much for your help.

\n

Why I get blob instead of resolve in the url?

', 'post_number': 3, 'post_type': 1, 'posts_count': 8, 'updated_at': '2025-10-08T11:27:51.251Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 16.0, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'Lelièvre', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105173, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/wget-with-token-not-working/169024/3', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243299, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-08T11:38:28.728Z', 'cooked': '

blob is for web UI file-viewer URL. resolve is for file itself. Probably got mixed in from copy-pasting.

', 'post_number': 4, 'post_type': 1, 'posts_count': 8, 'updated_at': '2025-10-08T11:39:07.386Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 21.0, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/wget-with-token-not-working/169024/4', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243301, 'name': 'Lelièvre', 'username': 'RenanL', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/8dc957/{size}.png', 'created_at': '2025-10-08T11:58:23.708Z', 'cooked': '

Need to check that!

\n

Thank you again.

', 'post_number': 5, 'post_type': 1, 'posts_count': 8, 'updated_at': '2025-10-08T11:58:23.708Z', 'reply_count': 0, 'reply_to_post_number': 4, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 16.0, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'Lelièvre', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105173, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/wget-with-token-not-working/169024/5', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243326, 'name': 'Vu Hung Nguyen', 'username': 'vuhung', 'avatar_template': '/user_avatar/discuss.huggingface.co/vuhung/{size}/53965_2.png', 'created_at': '2025-10-08T22:23:11.995Z', 'cooked': '

In this context, is curl better than wget?

', 'post_number': 6, 'post_type': 1, 'posts_count': 8, 'updated_at': '2025-10-08T22:23:11.995Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 20.6, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'Vu Hung Nguyen', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 103980, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/wget-with-token-not-working/169024/6', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243327, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-08T22:29:30.794Z', 'cooked': '

Yeah. Well, I think most people use curl. The HF sample also uses curl. Even in that case, though, you should probably use URLs with resolve in the default behavior.

', 'post_number': 7, 'post_type': 1, 'posts_count': 8, 'updated_at': '2025-10-08T22:29:30.794Z', 'reply_count': 0, 'reply_to_post_number': 6, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 0.6000000000000001, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'reply_to_user': {'id': 103980, 'username': 'vuhung', 'name': 'Vu Hung Nguyen', 'avatar_template': '/user_avatar/discuss.huggingface.co/vuhung/{size}/53965_2.png'}, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/wget-with-token-not-working/169024/7', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243371, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-09T10:29:31.103Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 8, 'post_type': 3, 'posts_count': 8, 'updated_at': '2025-10-09T10:29:31.103Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 0.4, 'yours': False, 'topic_id': 169024, 'topic_slug': 'wget-with-token-not-working', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/wget-with-token-not-working/169024/8', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Dear Hughingface Team,

-

I’m using runpod with the templates “ComfyUI - AI-Dock”.

-

In JupyterLab I want to download a login protected model, the one from black-forest-labs/FLUX.1-Krea-dev.

-

wget used to work like that, I can download the model from my browser after login on my local pc.

-

wget --header=“Authorization: Bearer TOKEN” ``https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors

-

But I get

-
401 Unauthorized
-Username/Password Authentication Failed.
-
-

If I add –debug at the end. I get:

-
DEBUG output created by Wget 1.21.2 on linux-gnu.
-
-Reading HSTS entries from /home/user/.wget-hsts
-URI encoding = ‘UTF-8’
-Converted file name 'flux1-dev.safetensors' (UTF-8) -> 'flux1-dev.safetensors' (UTF-8)
---2025-10-08 09:03:02--  https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors
-Resolving huggingface.co (huggingface.co)... 52.84.217.103, 52.84.217.69, 52.84.217.102, ...
-Caching huggingface.co => 52.84.217.103 52.84.217.69 52.84.217.102 52.84.217.88 2600:9000:203d:6200:17:b174:6d00:93a1 2600:9000:203d:e000:17:b174:6d00:93a1 2600:9000:203d:8800:17:b174:6d00:93a1 2600:9000:203d:e800:17:b174:6d00:93a1 2600:9000:203d:9600:17:b174:6d00:93a1 2600:9000:203d:2400:17:b174:6d00:93a1 2600:9000:203d:ee00:17:b174:6d00:93a1 2600:9000:203d:6400:17:b174:6d00:93a1
-Connecting to huggingface.co (huggingface.co)|52.84.217.103|:443... connected.
-Created socket 3.
-Releasing 0x000061bc69c86ec0 (new refcount 1).
-Initiating SSL handshake.
-Handshake successful; connected socket 3 to SSL handle 0x000061bc69c888a0
-certificate:
-  subject: CN=huggingface.co
-  issuer:  CN=Amazon RSA 2048 M02,O=Amazon,C=US
-X509 certificate successfully verified and matches host huggingface.co
-
----request begin---
-GET /black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors HTTP/1.1
-Host: huggingface.co
-User-Agent: Wget/1.21.2
-Accept: */*
-Accept-Encoding: identity
-Connection: Keep-Alive
-Authorization: Bearer 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
-
----request end---
-HTTP request sent, awaiting response... 
----response begin---
-HTTP/1.1 401 Unauthorized
-Content-Type: text/html; charset=utf-8
-Content-Length: 22349
-Connection: keep-alive
-Date: Wed, 08 Oct 2025 09:03:02 GMT
-ETag: W/""574d-1eC4sA5Q/PbQ5YhsvC0L0NiNhEc""
-X-Powered-By: huggingface-moon
-RateLimit: ""pages"";r=999;t=66
-RateLimit-Policy: ""fixed window"";""pages"";q=1000;w=300
-cross-origin-opener-policy: same-origin
-Referrer-Policy: strict-origin-when-cross-origin
-X-Request-Id: Root=1-68e628c6-753c6a394bc274c7764e5a2f
-X-Error-Message: Invalid credentials in Authorization header
-x-frame-options: SAMEORIGIN
-X-Cache: Error from cloudfront
-Via: 1.1 fdd255cb127a7759980ee879db5de580.cloudfront.net (CloudFront)
-X-Amz-Cf-Pop: DFW59-P5
-X-Amz-Cf-Id: tZ4CtuVneK0RyHpWtL5_DbEc3eq4qqEMlGoXvt8V9CLxqmo2CX4puw==
-
----response end---
-401 Unauthorized
-Registered socket 3 for persistent reuse.
-Disabling further reuse of socket 3.
-Closed 3/SSL 0x000061bc69c888a0
-
-Username/Password Authentication Failed.
-
-

Thank you for looking into that.

";"

How about resolve instead of blob for now?
-wget --header=""Authorization: Bearer TOKEN"" ""https://huggingface.co/black-forest-labs/FLUX.1-dev/resolve/main/flux1-dev.safetensors""

";1 -NonMatchingSplitsSizesError;https://discuss.huggingface.co/t/nonmatchingsplitssizeserror/30033;30033;10;2023-01-19 20:12:35.014000+00:00;"[{'id': 55242, 'name': 'Sundeep', 'username': 'sl02', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/ba9def/{size}.png', 'created_at': '2023-01-19T20:12:35.084Z', 'cooked': '

I created a custom script which splits the raw file into train/test split on the fly. The script works with the default arguments. However, when I change the test_size ratio which I pass via load_dataset(), it fails with the following error

\n
Traceback (most recent call last):                                                                                                                                                                                                                            \n  File ""<stdin>"", line 1, in <module>\n  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/load.py"", line 1757, in load_dataset\n    builder_instance.download_and_prepare(\n  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/builder.py"", line 860, in download_and_prepare\n    self._download_and_prepare(\n  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/builder.py"", line 1611, in _download_and_prepare\n    super()._download_and_prepare(\n  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/builder.py"", line 971, in _download_and_prepare\n    verify_splits(self.info.splits, split_dict)\n  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/utils/info_utils.py"", line 74, in verify_splits\n    raise NonMatchingSplitsSizesError(str(bad_splits))\ndatasets.utils.info_utils.NonMatchingSplitsSizesError\n
\n

It fails the integrity check as expected. The Build and load doesn’t show how to update the checks. I thought, using the download_mode=force_redownload argument in load_dataset() would fix it but it throws the same error as shown above. How do I resolve this?

', 'post_number': 1, 'post_type': 1, 'posts_count': 7, 'updated_at': '2023-01-19T20:12:35.084Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 6141, 'reads': 159, 'readers_count': 158, 'score': 30671.8, 'yours': False, 'topic_id': 30033, 'topic_slug': 'nonmatchingsplitssizeserror', 'display_username': 'Sundeep', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/datasets/about_dataset_load#maintaining-integrity', 'internal': False, 'reflection': False, 'title': 'Build and load', 'clicks': 7}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 12315, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/nonmatchingsplitssizeserror/30033/1', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 55836, 'name': 'Polina Kazakova', 'username': 'polinaeterna', 'avatar_template': '/user_avatar/discuss.huggingface.co/polinaeterna/{size}/19055_2.png', 'created_at': '2023-01-25T12:10:34.924Z', 'cooked': '

Hi @sl02 ! Is test_size a custom builder parameter you define in your loading script?

\n

You can set ignore_verifications=True param in load_dataset to skip splits sizes verification.

\n

Also note that Dataset object has .train_test_split() method, probably it might be useful for your case.

', 'post_number': 2, 'post_type': 1, 'posts_count': 7, 'updated_at': '2023-01-25T12:10:34.924Z', 'reply_count': 2, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 60, 'reads': 151, 'readers_count': 150, 'score': 355.2, 'yours': False, 'topic_id': 30033, 'topic_slug': 'nonmatchingsplitssizeserror', 'display_username': 'Polina Kazakova', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/datasets/process#split', 'internal': False, 'reflection': False, 'title': 'Process', 'clicks': 54}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 8429, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/nonmatchingsplitssizeserror/30033/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 56144, 'name': 'Sundeep', 'username': 'sl02', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/ba9def/{size}.png', 'created_at': '2023-01-27T13:14:44.170Z', 'cooked': '\n

Hi @polinaeterna
\nYes. test_size is a parameter. Sure with the ignore_verifications=True parameter it works. But I would like to know how, for other datasets when it changes at the source, do you update the information; The instructions in the document, to which I provide a link in the above thread, doesn’t explain this clearly.

\n

I am doing a group shuffle split because I have to ensure no overlap in the id column in the respective splits.

', 'post_number': 3, 'post_type': 1, 'posts_count': 7, 'updated_at': '2023-01-27T13:14:44.170Z', 'reply_count': 1, 'reply_to_post_number': 2, 'quote_count': 1, 'incoming_link_count': 85, 'reads': 148, 'readers_count': 147, 'score': 459.6, 'yours': False, 'topic_id': 30033, 'topic_slug': 'nonmatchingsplitssizeserror', 'display_username': 'Sundeep', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 8429, 'username': 'polinaeterna', 'name': 'Polina Kazakova', 'avatar_template': '/user_avatar/discuss.huggingface.co/polinaeterna/{size}/19055_2.png'}, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 12315, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/nonmatchingsplitssizeserror/30033/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 56173, 'name': 'Polina Kazakova', 'username': 'polinaeterna', 'avatar_template': '/user_avatar/discuss.huggingface.co/polinaeterna/{size}/19055_2.png', 'created_at': '2023-01-27T17:56:14.846Z', 'cooked': '

@sl02
\nWhen you load your dataset locally for the first time, it creates dataset_info.json file under its cache folder, the file contains all these splits info (like num_examples, num_bytes, etc.). If you regenerate the dataset while the script is unchanged (for example, run load_dataset with download_mode=""reuse_cache_if_exists""), it performs verifications against this file.

\n

We used to have dataset_info.json files in datasets repositories on the Hub (so, not just in a local cache folder) to verify splits info on the first download but now it’s deprecated, we use README.md instead for storing these numbers.
\nTo (re)compute these numbers automatically and dump them to a README.md file, one should run datasets-cli test your_dataset --save_info. And as it’s done manually, it depends on datasets’ authors if they update and push this info or not as it’s not required.
\nHope it’s more or less clear, feel free to ask any questions if it’s not

', 'post_number': 4, 'post_type': 1, 'posts_count': 7, 'updated_at': '2023-01-27T17:56:14.846Z', 'reply_count': 1, 'reply_to_post_number': 3, 'quote_count': 0, 'incoming_link_count': 101, 'reads': 133, 'readers_count': 132, 'score': 581.6, 'yours': False, 'topic_id': 30033, 'topic_slug': 'nonmatchingsplitssizeserror', 'display_username': 'Polina Kazakova', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 12315, 'username': 'sl02', 'name': 'Sundeep', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/ba9def/{size}.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 3}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 8429, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/nonmatchingsplitssizeserror/30033/4', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 3}], 'current_user_reaction': None, 'reaction_users_count': 3, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 56267, 'name': 'Sundeep', 'username': 'sl02', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/ba9def/{size}.png', 'created_at': '2023-01-28T14:18:23.729Z', 'cooked': '

@polinaeterna
\nThanks for clearing that up!

', 'post_number': 5, 'post_type': 1, 'posts_count': 7, 'updated_at': '2023-01-28T14:18:23.729Z', 'reply_count': 0, 'reply_to_post_number': 4, 'quote_count': 0, 'incoming_link_count': 36, 'reads': 114, 'readers_count': 113, 'score': 202.8, 'yours': False, 'topic_id': 30033, 'topic_slug': 'nonmatchingsplitssizeserror', 'display_username': 'Sundeep', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 8429, 'username': 'polinaeterna', 'name': 'Polina Kazakova', 'avatar_template': '/user_avatar/discuss.huggingface.co/polinaeterna/{size}/19055_2.png'}, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 12315, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/nonmatchingsplitssizeserror/30033/5', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 89573, 'name': 'Adam Hjerpe', 'username': 'hjerpe', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/h/7993a0/{size}.png', 'created_at': '2023-09-13T19:07:17.850Z', 'cooked': '

Note that you could get this error when you try and download an updated dataset without using the cache. E.g.,
\ndataset = load_dataset(url, download_mode=“force_redownload”)

\n

If the underlying dataset has been updated there can be a miss-match between the number of read records and what is read from the cache. You can read about the cache here, Cache management.

', 'post_number': 6, 'post_type': 1, 'posts_count': 7, 'updated_at': '2023-09-13T19:07:17.850Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 26, 'reads': 85, 'readers_count': 84, 'score': 147.0, 'yours': False, 'topic_id': 30033, 'topic_slug': 'nonmatchingsplitssizeserror', 'display_username': 'Adam Hjerpe', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/datasets/cache', 'internal': False, 'reflection': False, 'title': 'Cache management', 'clicks': 123}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 27951, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/nonmatchingsplitssizeserror/30033/6', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 243312, 'name': 'Albert Zeyer', 'username': 'albertzeyer', 'avatar_template': '/user_avatar/discuss.huggingface.co/albertzeyer/{size}/46906_2.png', 'created_at': '2025-10-08T16:51:31.810Z', 'cooked': '\n

This does not work anymore. I think now you have to use verification_mode=VerificationMode.NO_CHECKS.

', 'post_number': 7, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-10-08T16:51:31.810Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 1, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 30033, 'topic_slug': 'nonmatchingsplitssizeserror', 'display_username': 'Albert Zeyer', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 92881, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/nonmatchingsplitssizeserror/30033/7', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

I created a custom script which splits the raw file into train/test split on the fly. The script works with the default arguments. However, when I change the test_size ratio which I pass via load_dataset(), it fails with the following error

-
Traceback (most recent call last):                                                                                                                                                                                                                            
-  File ""<stdin>"", line 1, in <module>
-  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/load.py"", line 1757, in load_dataset
-    builder_instance.download_and_prepare(
-  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/builder.py"", line 860, in download_and_prepare
-    self._download_and_prepare(
-  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/builder.py"", line 1611, in _download_and_prepare
-    super()._download_and_prepare(
-  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/builder.py"", line 971, in _download_and_prepare
-    verify_splits(self.info.splits, split_dict)
-  File ""/Users/home/.local/share/virtualenvs/1717-yQ3Y_lVD/lib/python3.8/site-packages/datasets/utils/info_utils.py"", line 74, in verify_splits
-    raise NonMatchingSplitsSizesError(str(bad_splits))
-datasets.utils.info_utils.NonMatchingSplitsSizesError
-
-

It fails the integrity check as expected. The Build and load doesn’t show how to update the checks. I thought, using the download_mode=force_redownload argument in load_dataset() would fix it but it throws the same error as shown above. How do I resolve this?

";"

@sl02
-When you load your dataset locally for the first time, it creates dataset_info.json file under its cache folder, the file contains all these splits info (like num_examples, num_bytes, etc.). If you regenerate the dataset while the script is unchanged (for example, run load_dataset with download_mode=""reuse_cache_if_exists""), it performs verifications against this file.

-

We used to have dataset_info.json files in datasets repositories on the Hub (so, not just in a local cache folder) to verify splits info on the first download but now it’s deprecated, we use README.md instead for storing these numbers.
-To (re)compute these numbers automatically and dump them to a README.md file, one should run datasets-cli test your_dataset --save_info. And as it’s done manually, it depends on datasets’ authors if they update and push this info or not as it’s not required.
-Hope it’s more or less clear, feel free to ask any questions if it’s not

";1 -Error 404 when downloading the tokenizer;https://discuss.huggingface.co/t/error-404-when-downloading-the-tokenizer/168993;168993;9;2025-10-07 08:40:03.319000+00:00;"[{'id': 243207, 'name': 'Stefano', 'username': 'stefra', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/a9a28c/{size}.png', 'created_at': '2025-10-07T08:40:03.383Z', 'cooked': '

When I try to execute the following lines of code:

\n

quantization_config = BitsAndBytesConfig(load_in_8bit=True)
\ntokenizer = AutoTokenizer.from_pretrained(model_id)
\nmodel = AutoModelForCausalLM.from_pretrained(
\nmodel_id,
\ndevice_map=“auto”,
\nquantization_config=quantization_config
\n)

\n

The tokenizer raises a 404 Client Error: Not Found, specifically:
\n“Entry Not Found for URL: https://huggingface.co/api/models/Qwen/Qwen2.5-7B-Instruct/tree/main/additional_chat_templates?recursive=false&expand=false.
\nadditional_chat_templates does not exist on ‘main’.”

\n

The libraries I am using are:

\n\n

Is there anything I can do to fix this issue? Could it be related to a version mismatch? Any advice would be appreciated.

', 'post_number': 1, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-10-07T08:40:03.383Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 595, 'reads': 12, 'readers_count': 11, 'score': 2142.0, 'yours': False, 'topic_id': 168993, 'topic_slug': 'error-404-when-downloading-the-tokenizer', 'display_username': 'Stefano', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/api/models/Qwen/Qwen2.5-7B-Instruct/tree/main/additional_chat_templates?recursive=false&expand=false', 'internal': False, 'reflection': False, 'clicks': 1}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 105159, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/error-404-when-downloading-the-tokenizer/168993/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 243209, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-10-07T09:34:58.688Z', 'cooked': '

Seems a resolved bug of Transformers. Try upgrade pip install -U transformers

', 'post_number': 2, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-10-07T09:34:58.688Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 24, 'reads': 11, 'readers_count': 10, 'score': 86.8, 'yours': False, 'topic_id': 168993, 'topic_slug': 'error-404-when-downloading-the-tokenizer', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/transformers/issues/39873', 'internal': False, 'reflection': False, 'title': ""Checking for additional_chat_templates doesn't work without internet (ConnectionError) · Issue #39873 · huggingface/transformers · GitHub"", 'clicks': 89}, {'url': 'https://discuss.huggingface.co/t/autotokenizer-404-error-issue/169085/2', 'internal': True, 'reflection': True, 'title': 'AutoTokenizer 404 error issue', 'clicks': 6}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/error-404-when-downloading-the-tokenizer/168993/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 243240, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-10-07T21:35:22.053Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 3, 'post_type': 3, 'posts_count': 3, 'updated_at': '2025-10-07T21:35:22.053Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 6, 'reads': 10, 'readers_count': 9, 'score': 16.6, 'yours': False, 'topic_id': 168993, 'topic_slug': 'error-404-when-downloading-the-tokenizer', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/error-404-when-downloading-the-tokenizer/168993/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

When I try to execute the following lines of code:

-

quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(
-model_id,
-device_map=“auto”,
-quantization_config=quantization_config
-)

-

The tokenizer raises a 404 Client Error: Not Found, specifically:
-“Entry Not Found for URL: https://huggingface.co/api/models/Qwen/Qwen2.5-7B-Instruct/tree/main/additional_chat_templates?recursive=false&expand=false.
-additional_chat_templates does not exist on ‘main’.”

-

The libraries I am using are:

- -

Is there anything I can do to fix this issue? Could it be related to a version mismatch? Any advice would be appreciated.

";"

Seems a resolved bug of Transformers. Try upgrade pip install -U transformers

";1 -Permission error when starting a LableStudio space;https://discuss.huggingface.co/t/permission-error-when-starting-a-lablestudio-space/168735;168735;5;2025-09-28 01:03:19.470000+00:00;"[{'id': 242700, 'name': 'Lin Chen you', 'username': 'cylin577', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/c/dbc845/{size}.png', 'created_at': '2025-09-28T01:03:19.540Z', 'cooked': '

It says

\n
Exit code: 1. Reason: => Database and media directory: /label-studio/data\n=> Static URL is set to: /static/\nTraceback (most recent call last):\n  File ""/label-studio/.venv/bin/label-studio"", line 3, in <module>\n    from label_studio.server import main\n  File ""/label-studio/label_studio/server.py"", line 23, in <module>\n    from label_studio.core.argparser import parse_input_args\n  File ""/label-studio/label_studio/core/argparser.py"", line 5, in <module>\n    from .settings.base import EXPORT_DIR\n  File ""/label-studio/label_studio/core/settings/base.py"", line 470, in <module>\n    os.makedirs(MEDIA_ROOT, exist_ok=True)\n  File ""<frozen os>"", line 225, in makedirs\nPermissionError: [Errno 13] Permission denied: \'/label-studio/data/media\'\n
\n

When starting up

', 'post_number': 1, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-09-28T01:05:44.089Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 16, 'reads': 5, 'readers_count': 4, 'score': 76.0, 'yours': False, 'topic_id': 168735, 'topic_slug': 'permission-error-when-starting-a-lablestudio-space', 'display_username': 'Lin Chen you', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104613, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/permission-error-when-starting-a-lablestudio-space/168735/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 242703, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-28T03:39:16.858Z', 'cooked': '

The cause is attempting to write to a directory that is not writable due to permissions. Setting the following environment variable would resolve this.
\nLABEL_STUDIO_BASE_DATA_DIR=/tmp/label-studio
\nAny directory with write permissions will work.

', 'post_number': 2, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-09-28T03:40:55.524Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 4, 'readers_count': 3, 'score': 5.8, 'yours': False, 'topic_id': 168735, 'topic_slug': 'permission-error-when-starting-a-lablestudio-space', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/permissionerror-errno-13-permission-denied-cache/146951/5', 'internal': True, 'reflection': False, 'title': ""PermissionError: [Errno 13] Permission denied: '/.cache'"", 'clicks': 1}, {'url': 'https://labelstud.io/guide/start', 'internal': False, 'reflection': False, 'title': 'Label Studio Documentation — Start commands for Label Studio', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/permission-error-when-starting-a-lablestudio-space/168735/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 242707, 'name': 'James David', 'username': 'JamesDavids', 'avatar_template': '/user_avatar/discuss.huggingface.co/jamesdavids/{size}/54347_2.png', 'created_at': '2025-09-28T08:09:39.165Z', 'cooked': '

That error is pretty straightforward — Label Studio is trying to create its media folder but doesn’t have permission.

\n

Here’s how to fix it:

\n
    \n
  1. \n

    Check who owns the folder

    \n
    ls -ld /label-studio/data\n\n
    \n

    If it’s owned by root, Label Studio (running as a different user) can’t write there.

    \n
  2. \n
  3. \n

    Give yourself permission

    \n
    sudo chown -R $USER:$USER /label-studio/data\n\n
    \n

    or if you’re running inside Docker, adjust ownership to the container user (often 1001 or label-studio).

    \n
  4. \n
  5. \n

    Set writable permissions (if quick and dirty):

    \n
    sudo chmod -R 777 /label-studio/data\n\n
    \n

    This is less safe, but fine for local experiments.

    \n
  6. \n
  7. \n

    If Dockerized:

    \n\n
  8. \n
', 'post_number': 3, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-09-28T08:09:39.165Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 4, 'readers_count': 3, 'score': 10.8, 'yours': False, 'topic_id': 168735, 'topic_slug': 'permission-error-when-starting-a-lablestudio-space', 'display_username': 'James David', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104627, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/permission-error-when-starting-a-lablestudio-space/168735/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 242716, 'name': 'Lin Chen you', 'username': 'cylin577', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/c/dbc845/{size}.png', 'created_at': '2025-09-28T10:36:56.104Z', 'cooked': '

Thanks! It worked!

', 'post_number': 4, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-09-28T10:36:56.104Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 168735, 'topic_slug': 'permission-error-when-starting-a-lablestudio-space', 'display_username': 'Lin Chen you', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 104613, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/permission-error-when-starting-a-lablestudio-space/168735/4', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 242730, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-28T22:37:38.529Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 5, 'post_type': 3, 'posts_count': 5, 'updated_at': '2025-09-28T22:37:38.529Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 9, 'reads': 1, 'readers_count': 0, 'score': 45.2, 'yours': False, 'topic_id': 168735, 'topic_slug': 'permission-error-when-starting-a-lablestudio-space', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/permission-error-when-starting-a-lablestudio-space/168735/5', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

It says

-
Exit code: 1. Reason: => Database and media directory: /label-studio/data
-=> Static URL is set to: /static/
-Traceback (most recent call last):
-  File ""/label-studio/.venv/bin/label-studio"", line 3, in <module>
-    from label_studio.server import main
-  File ""/label-studio/label_studio/server.py"", line 23, in <module>
-    from label_studio.core.argparser import parse_input_args
-  File ""/label-studio/label_studio/core/argparser.py"", line 5, in <module>
-    from .settings.base import EXPORT_DIR
-  File ""/label-studio/label_studio/core/settings/base.py"", line 470, in <module>
-    os.makedirs(MEDIA_ROOT, exist_ok=True)
-  File ""<frozen os>"", line 225, in makedirs
-PermissionError: [Errno 13] Permission denied: '/label-studio/data/media'
-
-

When starting up

";"

The cause is attempting to write to a directory that is not writable due to permissions. Setting the following environment variable would resolve this.
-LABEL_STUDIO_BASE_DATA_DIR=/tmp/label-studio
-Any directory with write permissions will work.

";1 -The best model is not being saved;https://discuss.huggingface.co/t/the-best-model-is-not-being-saved/168528;168528;5;2025-09-18 14:00:56.645000+00:00;"[{'id': 242243, 'name': 'Alex', 'username': 'SuperBowser', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/9f8e36/{size}.png', 'created_at': '2025-09-18T14:00:56.730Z', 'cooked': '

I am using custom metric and in my training arguments I have

\n
greater_is_better=True,\nload_best_model_at_end=True,\n
\n

But as far as I can the best model is not being saved. Here is link to my Colab notebook:

\n

Colab

\n

And here are all the details just in case:

\n

My platform and system data:

\n

platform: Linux
\nrelease: 6.1.123+
\nversion: #1 SMP PREEMPT_DYNAMIC Sun Mar 30 16:01:29 UTC 2025
\nmachine: x86_64
\ntorch: 2.8.0+cu126
\ntransformers:4.55.4
\ncompiler: 3.12.11 (main, Jun 4 2025, 08:56:18) [GCC 11.4.0]
\nGPU/TPU: Tesla T4
\nCUDA compiler:
\nnvcc: NVIDIA (R) Cuda compiler driver
\nCopyright (c) 2005-2024 NVIDIA Corporation
\nBuilt on Thu_Jun__6_02:18:23_PDT_2024
\nCuda compilation tools, release 12.5, V12.5.82
\nBuild cuda_12.5.r12.5/compiler.34385749_0

\n

Here is my code:

\n
from transformers import AutoModelForSequenceClassification, AutoTokenizer\nimport transformersimport sysimport torch\nimport pandas as pd, numpy as npfrom sklearn.preprocessing\nimport LabelEncoder\n
\n
import joblibimport pandas as pd\nimport os\nfrom sklearn.model_selection import train_test_split\nfrom datasets import Datasetimport numpy as np\nfrom transformers import TrainingArguments,Trainer\nimport platform\n\nimport os\nmodel_name = \'microsoft/deberta-v3-xsmall\'\nmodel_name_path = \'deberta-v3-xsmall\'\nDIR = \'../MAP_models/\'+model_name_path+\'/tuned/\'\nos.makedirs(\'../MAP_models\', exist_ok = True)\nos.makedirs(\'../MAP_models/\'+model_name_path, exist_ok = True)\nos.makedirs(\'../MAP_models/\'+model_name_path+\'/tuned\', exist_ok=True)\nos.makedirs(\'../MAP_models/\'+model_name_path+\'/tuned/model\', exist_ok=True)\n\n\nNUM_LABELS = 65\ntext = [f""example {i}"" for i in range(300)]\nlabel = [i % NUM_LABELS for i in range(300)]\ntrain = pd.DataFrame({\'text\': text, \'label\': label})\n\ntrain_df, val_df = train_test_split(train, test_size=0.2, random_state=42)\n\n# Convert to Hugging Face Dataset\nCOLS = [\'text\',\'label\']\ntrain_ds = Dataset.from_pandas(train_df[COLS])\nval_ds = Dataset.from_pandas(val_df[COLS])\n\n\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nMAX_LEN = 256\n   \n# Tokenization function\ndef tokenize(batch):\n    return tokenizer(batch[""text""], padding=""max_length"", truncation=True, max_length=256)\n    \ntrain_ds = train_ds.map(tokenize, batched=True)\nval_ds = val_ds.map(tokenize, batched=True)\n    \n# Set format for PyTorch\ncolumns = [\'input_ids\', \'attention_mask\', \'label\']\ntrain_ds.set_format(type=\'torch\', columns=columns)\nval_ds.set_format(type=\'torch\', columns=columns)\n\nmodel = AutoModelForSequenceClassification.from_pretrained(\n    model_name,\n    num_labels=NUM_LABELS, trust_remote_code=True\n    )\n\ndef compute_map3(eval_pred):\n    logits, labels = eval_pred\n    probs = torch.nn.functional.softmax(torch.tensor(logits), dim=-1).numpy()\n    \n    top3 = np.argsort(-probs, axis=1)[:, :3]  # Top 3 predictions\n    match = (top3 == labels[:, None])\n\n    # Compute MAP@3 manually\n    map3 = 0\n    for i in range(len(labels)):\n        if match[i, 0]:\n            map3 += 1.0\n        elif match[i, 1]:\n            map3 += 1.0 / 2\n        elif match[i, 2]:\n            map3 += 1.0 / 3\n    return {""map@3"": map3 / len(labels)}\n\nargs = TrainingArguments(\n        per_device_train_batch_size = 2, \n        per_device_eval_batch_size= 2,\n        gradient_accumulation_steps = 1,\n        warmup_steps = 10,\n        num_train_epochs = 1,\n        learning_rate = 5e-5,\n        fp16 = True,\n        bf16 = False,\n        logging_steps = 1,\n        optim = ""adamw_torch_fused"",\n        weight_decay = 0.01,\n        eval_strategy=""steps"",\n        lr_scheduler_type = ""cosine_with_restarts"",\n        seed = 3407,\n        output_dir = DIR+""output"",\n        logging_dir=DIR+""logs"",\n        greater_is_better=True,\n        load_best_model_at_end=True,\n        save_steps=10,\n        eval_steps=10,\n        save_total_limit=3,\n        report_to = ""none"", \n    )\n\ntrainer = Trainer(\n    model = model,\n    processing_class = tokenizer,\n    eval_dataset = val_ds,\n    train_dataset = train_ds,\n    args = args,\n    compute_metrics = compute_map3,\n)\n\ntrainer_stats = trainer.train()\n\n\n
\n

It produces the following output

\n

Step\tTraining Loss\tValidation Loss\tMap@3
\n10\t4.235900\t4.182212\t0.025000
\n20\t4.245500\t4.176703\t0.038889
\n30\t4.166400\t4.171503\t0.030556
\n40\t4.163400\t4.174795\t0.025000
\n50\t4.187000\t4.174973\t0.025000
\n60\t4.240600\t4.176061\t0.038889
\n70\t4.123800\t4.177481\t0.036111
\n80\t4.130100\t4.177088\t0.033333
\n90\t4.140700\t4.177318\t0.022222
\n100\t4.180000\t4.178491\t0.022222
\n110\t4.112100\t4.178146\t0.025000
\n120\t4.229100\t4.178137\t0.025000

\n

But when I run

\n

trainer.evaluate(val_ds)

\n

{‘eval_loss’: 4.1822123527526855,
\n‘eval_map@3’: 0.025,
\n‘eval_runtime’: 0.9703,
\n‘eval_samples_per_second’: 61.836,
\n‘eval_steps_per_second’: 30.918,
\n‘epoch’: 1.0}

\n

It seems like evaluation is done on the very first 10 steps, rather than on the best model.

\n

What am I doing wrong?

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-09-18T14:02:06.119Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 4, 'reads': 9, 'readers_count': 8, 'score': 36.8, 'yours': False, 'topic_id': 168528, 'topic_slug': 'the-best-model-is-not-being-saved', 'display_username': 'Alex', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://colab.research.google.com/drive/1ehTt53xlGV0Byx6yelifdEZcSgFREncy?usp=drive_link', 'internal': False, 'reflection': False, 'title': 'Google Colab', 'clicks': 1}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102016, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/the-best-model-is-not-being-saved/168528/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 242254, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-18T15:10:23.889Z', 'cooked': '

Due to metric_for_best_model is missing, etc. ?

', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-09-18T15:10:23.889Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 6, 'readers_count': 5, 'score': 11.2, 'yours': False, 'topic_id': 168528, 'topic_slug': 'the-best-model-is-not-being-saved', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/datasets/John6666/forum1/blob/main/best_model_not_saved.md', 'internal': False, 'reflection': False, 'clicks': 2}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/the-best-model-is-not-being-saved/168528/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 242256, 'name': 'Alex', 'username': 'SuperBowser', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/9f8e36/{size}.png', 'created_at': '2025-09-18T15:30:32.007Z', 'cooked': '

Thank you so much! What a blunder!

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-09-18T15:30:32.007Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 6, 'readers_count': 5, 'score': 16.2, 'yours': False, 'topic_id': 168528, 'topic_slug': 'the-best-model-is-not-being-saved', 'display_username': 'Alex', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102016, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/the-best-model-is-not-being-saved/168528/3', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 242284, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-19T03:31:12.250Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-09-19T03:31:12.250Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 0.4, 'yours': False, 'topic_id': 168528, 'topic_slug': 'the-best-model-is-not-being-saved', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/the-best-model-is-not-being-saved/168528/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

I am using custom metric and in my training arguments I have

-
greater_is_better=True,
-load_best_model_at_end=True,
-
-

But as far as I can the best model is not being saved. Here is link to my Colab notebook:

-

Colab

-

And here are all the details just in case:

-

My platform and system data:

-

platform: Linux
-release: 6.1.123+
-version: #1 SMP PREEMPT_DYNAMIC Sun Mar 30 16:01:29 UTC 2025
-machine: x86_64
-torch: 2.8.0+cu126
-transformers:4.55.4
-compiler: 3.12.11 (main, Jun 4 2025, 08:56:18) [GCC 11.4.0]
-GPU/TPU: Tesla T4
-CUDA compiler:
-nvcc: NVIDIA (R) Cuda compiler driver
-Copyright (c) 2005-2024 NVIDIA Corporation
-Built on Thu_Jun__6_02:18:23_PDT_2024
-Cuda compilation tools, release 12.5, V12.5.82
-Build cuda_12.5.r12.5/compiler.34385749_0

-

Here is my code:

-
from transformers import AutoModelForSequenceClassification, AutoTokenizer
-import transformersimport sysimport torch
-import pandas as pd, numpy as npfrom sklearn.preprocessing
-import LabelEncoder
-
-
import joblibimport pandas as pd
-import os
-from sklearn.model_selection import train_test_split
-from datasets import Datasetimport numpy as np
-from transformers import TrainingArguments,Trainer
-import platform
-
-import os
-model_name = 'microsoft/deberta-v3-xsmall'
-model_name_path = 'deberta-v3-xsmall'
-DIR = '../MAP_models/'+model_name_path+'/tuned/'
-os.makedirs('../MAP_models', exist_ok = True)
-os.makedirs('../MAP_models/'+model_name_path, exist_ok = True)
-os.makedirs('../MAP_models/'+model_name_path+'/tuned', exist_ok=True)
-os.makedirs('../MAP_models/'+model_name_path+'/tuned/model', exist_ok=True)
-
-
-NUM_LABELS = 65
-text = [f""example {i}"" for i in range(300)]
-label = [i % NUM_LABELS for i in range(300)]
-train = pd.DataFrame({'text': text, 'label': label})
-
-train_df, val_df = train_test_split(train, test_size=0.2, random_state=42)
-
-# Convert to Hugging Face Dataset
-COLS = ['text','label']
-train_ds = Dataset.from_pandas(train_df[COLS])
-val_ds = Dataset.from_pandas(val_df[COLS])
-
-
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-MAX_LEN = 256
-   
-# Tokenization function
-def tokenize(batch):
-    return tokenizer(batch[""text""], padding=""max_length"", truncation=True, max_length=256)
-    
-train_ds = train_ds.map(tokenize, batched=True)
-val_ds = val_ds.map(tokenize, batched=True)
-    
-# Set format for PyTorch
-columns = ['input_ids', 'attention_mask', 'label']
-train_ds.set_format(type='torch', columns=columns)
-val_ds.set_format(type='torch', columns=columns)
-
-model = AutoModelForSequenceClassification.from_pretrained(
-    model_name,
-    num_labels=NUM_LABELS, trust_remote_code=True
-    )
-
-def compute_map3(eval_pred):
-    logits, labels = eval_pred
-    probs = torch.nn.functional.softmax(torch.tensor(logits), dim=-1).numpy()
-    
-    top3 = np.argsort(-probs, axis=1)[:, :3]  # Top 3 predictions
-    match = (top3 == labels[:, None])
-
-    # Compute MAP@3 manually
-    map3 = 0
-    for i in range(len(labels)):
-        if match[i, 0]:
-            map3 += 1.0
-        elif match[i, 1]:
-            map3 += 1.0 / 2
-        elif match[i, 2]:
-            map3 += 1.0 / 3
-    return {""map@3"": map3 / len(labels)}
-
-args = TrainingArguments(
-        per_device_train_batch_size = 2, 
-        per_device_eval_batch_size= 2,
-        gradient_accumulation_steps = 1,
-        warmup_steps = 10,
-        num_train_epochs = 1,
-        learning_rate = 5e-5,
-        fp16 = True,
-        bf16 = False,
-        logging_steps = 1,
-        optim = ""adamw_torch_fused"",
-        weight_decay = 0.01,
-        eval_strategy=""steps"",
-        lr_scheduler_type = ""cosine_with_restarts"",
-        seed = 3407,
-        output_dir = DIR+""output"",
-        logging_dir=DIR+""logs"",
-        greater_is_better=True,
-        load_best_model_at_end=True,
-        save_steps=10,
-        eval_steps=10,
-        save_total_limit=3,
-        report_to = ""none"", 
-    )
-
-trainer = Trainer(
-    model = model,
-    processing_class = tokenizer,
-    eval_dataset = val_ds,
-    train_dataset = train_ds,
-    args = args,
-    compute_metrics = compute_map3,
-)
-
-trainer_stats = trainer.train()
-
-
-
-

It produces the following output

-

Step Training Loss Validation Loss Map@3
-10 4.235900 4.182212 0.025000
-20 4.245500 4.176703 0.038889
-30 4.166400 4.171503 0.030556
-40 4.163400 4.174795 0.025000
-50 4.187000 4.174973 0.025000
-60 4.240600 4.176061 0.038889
-70 4.123800 4.177481 0.036111
-80 4.130100 4.177088 0.033333
-90 4.140700 4.177318 0.022222
-100 4.180000 4.178491 0.022222
-110 4.112100 4.178146 0.025000
-120 4.229100 4.178137 0.025000

-

But when I run

-

trainer.evaluate(val_ds)

-

{‘eval_loss’: 4.1822123527526855,
-‘eval_map@3’: 0.025,
-‘eval_runtime’: 0.9703,
-‘eval_samples_per_second’: 61.836,
-‘eval_steps_per_second’: 30.918,
-‘epoch’: 1.0}

-

It seems like evaluation is done on the very first 10 steps, rather than on the best model.

-

What am I doing wrong?

";"

Due to metric_for_best_model is missing, etc. ?

";1 -Getting the Space name programmatically;https://discuss.huggingface.co/t/getting-the-space-name-programmatically/168253;168253;24;2025-09-10 09:20:15.719000+00:00;"[{'id': 241610, 'name': 'João Ricardo Silva', 'username': 'jrsilva', 'avatar_template': '/user_avatar/discuss.huggingface.co/jrsilva/{size}/53168_2.png', 'created_at': '2025-09-10T09:20:15.781Z', 'cooked': '

Is there a programmatic way of a Space knowing its own name?

\n

For instance, the restart_space method of the huggingface_hub API requires a repo_id. If, say, I want the Space to restart itself, is there a programmatic way of getting this repo_id (and thus working without requiring changes if the Space is ever renamed) or do I have to hard-code it?

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-09-10T09:20:15.781Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 12, 'reads': 4, 'readers_count': 3, 'score': 65.8, 'yours': False, 'topic_id': 168253, 'topic_slug': 'getting-the-space-name-programmatically', 'display_username': 'João Ricardo Silva', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102714, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/getting-the-space-name-programmatically/168253/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 241616, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-10T10:59:05.305Z', 'cooked': '

Maybe simply by this?

\n
import os\nspace_id = os.getenv(""SPACE_ID"", """")          # e.g. ""username/space-name""\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-09-10T10:59:05.305Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 5.8, 'yours': False, 'topic_id': 168253, 'topic_slug': 'getting-the-space-name-programmatically', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/hub/en/spaces-overview#helper-environment-variables', 'internal': False, 'reflection': False, 'title': 'Spaces Overview', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/getting-the-space-name-programmatically/168253/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 241627, 'name': 'João Ricardo Silva', 'username': 'jrsilva', 'avatar_template': '/user_avatar/discuss.huggingface.co/jrsilva/{size}/53168_2.png', 'created_at': '2025-09-10T12:04:43.563Z', 'cooked': '

You are quite right. I somehow missed that part of the documentation. Thank you.

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-09-10T12:04:43.563Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 15.6, 'yours': False, 'topic_id': 168253, 'topic_slug': 'getting-the-space-name-programmatically', 'display_username': 'João Ricardo Silva', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102714, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/getting-the-space-name-programmatically/168253/3', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 241672, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-11T00:04:44.148Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-09-11T00:04:44.148Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 168253, 'topic_slug': 'getting-the-space-name-programmatically', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/getting-the-space-name-programmatically/168253/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Is there a programmatic way of a Space knowing its own name?

-

For instance, the restart_space method of the huggingface_hub API requires a repo_id. If, say, I want the Space to restart itself, is there a programmatic way of getting this repo_id (and thus working without requiring changes if the Space is ever renamed) or do I have to hard-code it?

";"

Maybe simply by this?

-
import os
-space_id = os.getenv(""SPACE_ID"", """")          # e.g. ""username/space-name""
-
";1 -Image to text using blip2 gives incorrect answer;https://discuss.huggingface.co/t/image-to-text-using-blip2-gives-incorrect-answer/168177;168177;5;2025-09-07 15:31:05.250000+00:00;"[{'id': 241418, 'name': 'Raman Shah', 'username': 'rxshah', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/a587f6/{size}.png', 'created_at': '2025-09-07T15:31:05.323Z', 'cooked': '

Here is code snippet slightly modified from blip2 site:

\n

first prompt “Question: How many cats are there? Answer:” –> gives correct answer Two

\n

However, second prompt “Question: How many dogs are there? Answer:” –> gives incorrect answer - Two should be Zero or None.

\n

Is this because the accuracy of the trained model is not 100% we should get incorrect answers? OR AM I doing something incorrectly?

\n

Here is the complete code:

\n

from PIL import Image
\nimport requests
\nfrom transformers import Blip2Processor, Blip2ForConditionalGeneration
\nimport torch

\n

device = “cuda” if torch.cuda.is_available() else “cpu”

\n

processor = Blip2Processor.from_pretrained(“Salesforce/blip2-opt-2.7b”)
\nmodel = Blip2ForConditionalGeneration.from_pretrained(
\n“Salesforce/blip2-opt-2.7b”, torch_dtype=torch.float16
\n)
\nmodel.to(device)

\n

url = “http://images.cocodataset.org/val2017/000000039769.jpg”
\nimage = Image.open(requests.get(url, stream=True).raw)

\n

prompt = “Question: How many cats are there? Answer:”
\ninputs = processor(images=image, text=prompt, return_tensors=“pt”).to(
\ndevice, torch.float16
\n)

\n

outputs = model.generate(**inputs)

\n

text = processor.tokenizer.batch_decode(outputs, skip_special_tokens=True)
\nprint(text)

\n

Gives correct answer: [‘Question: How many cats are there? Answer: Two\\n’]

\n

However, when I change prompt to

\n

prompt2 = ""Question: How many dogs are there? Answer: ""

\n

inputs2 = processor(images=image, text=prompt2, return_tensors=“pt”).to(
\ndevice, torch.float16
\n)

\n

outputs2 = model.generate(**inputs2)

\n

text2 = processor.tokenizer.batch_decode(outputs2, skip_special_tokens=True)
\nprint(text2)

\n

[‘Question: How many dogs are there? Answer: Two\\n’]

', 'post_number': 1, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-07T15:45:45.288Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 9, 'reads': 6, 'readers_count': 5, 'score': 61.2, 'yours': False, 'topic_id': 168177, 'topic_slug': 'image-to-text-using-blip2-gives-incorrect-answer', 'display_username': 'Raman Shah', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 3, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'http://images.cocodataset.org/val2017/000000039769.jpg%E2%80%9D', 'internal': False, 'reflection': False, 'clicks': 0}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 80638, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/image-to-text-using-blip2-gives-incorrect-answer/168177/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 241436, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-07T20:48:34.727Z', 'cooked': '
\n

OR AM I doing something incorrectly?

\n
\n

There’s no problem with the code; it seems to be a known issue with the model / architecture. You might want to try using some fine-tuned version.

', 'post_number': 2, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-07T20:48:34.727Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 5.8, 'yours': False, 'topic_id': 168177, 'topic_slug': 'image-to-text-using-blip2-gives-incorrect-answer', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/Salesforce/blip2-opt-2.7b-coco', 'internal': False, 'reflection': False, 'title': 'Salesforce/blip2-opt-2.7b-coco · Hugging Face', 'clicks': 2}, {'url': 'https://arxiv.org/pdf/2403.01373', 'internal': False, 'reflection': False, 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/image-to-text-using-blip2-gives-incorrect-answer/168177/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 241443, 'name': 'Raman Shah', 'username': 'rxshah', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/a587f6/{size}.png', 'created_at': '2025-09-08T01:14:33.037Z', 'cooked': '

Thanks!!

\n

Tried the examples you pointed to. The number of dogs still gave Two. However, following the examples further got following results:

\n
55.3% that image 0 is \'a photo of a cat\'\n44.7% that image 0 is \'a photo of a dog\'\n
\n

Perhaps this explains why the model cannot distinguish between cats, dogs or anything else?

', 'post_number': 3, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-08T01:14:33.037Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 168177, 'topic_slug': 'image-to-text-using-blip2-gives-incorrect-answer', 'display_username': 'Raman Shah', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 80638, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/image-to-text-using-blip2-gives-incorrect-answer/168177/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 241446, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-08T03:51:52.414Z', 'cooked': '

Yeah. For example, CLIP can perfectly classify dogs and cats, but BLIP seems utterly unsuitable for classification

', 'post_number': 4, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-08T03:51:52.414Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 20.8, 'yours': False, 'topic_id': 168177, 'topic_slug': 'image-to-text-using-blip2-gives-incorrect-answer', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/datasets/John6666/forum1/blob/main/blip2_cats_dogs.md', 'internal': False, 'reflection': False, 'title': 'blip2_cats_dogs.md · John6666/forum1 at main', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/image-to-text-using-blip2-gives-incorrect-answer/168177/4', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 241472, 'name': 'Raman Shah', 'username': 'rxshah', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/a587f6/{size}.png', 'created_at': '2025-09-08T13:52:59.063Z', 'cooked': '

Thanks for the clear explanation!!

', 'post_number': 5, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-08T13:52:59.063Z', 'reply_count': 0, 'reply_to_post_number': 4, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 15.6, 'yours': False, 'topic_id': 168177, 'topic_slug': 'image-to-text-using-blip2-gives-incorrect-answer', 'display_username': 'Raman Shah', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 80638, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/image-to-text-using-blip2-gives-incorrect-answer/168177/5', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 241501, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-09T01:53:46.094Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 6, 'post_type': 3, 'posts_count': 6, 'updated_at': '2025-09-09T01:53:46.094Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 0.4, 'yours': False, 'topic_id': 168177, 'topic_slug': 'image-to-text-using-blip2-gives-incorrect-answer', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/image-to-text-using-blip2-gives-incorrect-answer/168177/6', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Here is code snippet slightly modified from blip2 site:

-

first prompt “Question: How many cats are there? Answer:” –> gives correct answer Two

-

However, second prompt “Question: How many dogs are there? Answer:” –> gives incorrect answer - Two should be Zero or None.

-

Is this because the accuracy of the trained model is not 100% we should get incorrect answers? OR AM I doing something incorrectly?

-

Here is the complete code:

-

from PIL import Image
-import requests
-from transformers import Blip2Processor, Blip2ForConditionalGeneration
-import torch

-

device = “cuda” if torch.cuda.is_available() else “cpu”

-

processor = Blip2Processor.from_pretrained(“Salesforce/blip2-opt-2.7b”)
-model = Blip2ForConditionalGeneration.from_pretrained(
-“Salesforce/blip2-opt-2.7b”, torch_dtype=torch.float16
-)
-model.to(device)

-

url = “http://images.cocodataset.org/val2017/000000039769.jpg”
-image = Image.open(requests.get(url, stream=True).raw)

-

prompt = “Question: How many cats are there? Answer:”
-inputs = processor(images=image, text=prompt, return_tensors=“pt”).to(
-device, torch.float16
-)

-

outputs = model.generate(**inputs)

-

text = processor.tokenizer.batch_decode(outputs, skip_special_tokens=True)
-print(text)

-

Gives correct answer: [‘Question: How many cats are there? Answer: Two\n’]

-

However, when I change prompt to

-

prompt2 = ""Question: How many dogs are there? Answer: ""

-

inputs2 = processor(images=image, text=prompt2, return_tensors=“pt”).to(
-device, torch.float16
-)

-

outputs2 = model.generate(**inputs2)

-

text2 = processor.tokenizer.batch_decode(outputs2, skip_special_tokens=True)
-print(text2)

-

[‘Question: How many dogs are there? Answer: Two\n’]

";"

Yeah. For example, CLIP can perfectly classify dogs and cats, but BLIP seems utterly unsuitable for classification

";1 -Prevent creation of multiple checkpoints;https://discuss.huggingface.co/t/prevent-creation-of-multiple-checkpoints/168144;168144;5;2025-09-05 20:15:07.934000+00:00;"[{'id': 241309, 'name': 'Alex', 'username': 'SuperBowser', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/9f8e36/{size}.png', 'created_at': '2025-09-05T20:15:08.005Z', 'cooked': '

In my training arguments I selected to save every 200 steps, but my model is fairly large (relative to my disk size). I would like to save every 200 steps, but every save should just overwrite previous save instead of creating new save point. Is this possible?

', 'post_number': 1, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-09-05T20:15:08.005Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 7, 'reads': 5, 'readers_count': 4, 'score': 51.0, 'yours': False, 'topic_id': 168144, 'topic_slug': 'prevent-creation-of-multiple-checkpoints', 'display_username': 'Alex', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102016, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/prevent-creation-of-multiple-checkpoints/168144/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 241317, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-06T00:19:59.432Z', 'cooked': '

Strictly speaking, it’s not overwriting, but I think save_total_limit or save_only_model are closer to the intended purpose.

\n
from transformers import TrainingArguments\n\nargs = TrainingArguments(\n    output_dir=""out"",\n    save_strategy=""steps"",\n    save_steps=200,\n    save_total_limit=1,      # deletes older checkpoints\n    save_only_model=True,    # 4.37+; skips optimizer/scheduler to shrink size\n)\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-09-06T00:19:59.432Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 1.0, 'yours': False, 'topic_id': 168144, 'topic_slug': 'prevent-creation-of-multiple-checkpoints', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments.save_total_limit', 'internal': False, 'reflection': False, 'title': 'Trainer', 'clicks': 1}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/prevent-creation-of-multiple-checkpoints/168144/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 241444, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-08T01:48:01.261Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 3, 'post_type': 3, 'posts_count': 3, 'updated_at': '2025-09-08T01:48:01.261Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 168144, 'topic_slug': 'prevent-creation-of-multiple-checkpoints', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/prevent-creation-of-multiple-checkpoints/168144/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";

In my training arguments I selected to save every 200 steps, but my model is fairly large (relative to my disk size). I would like to save every 200 steps, but every save should just overwrite previous save instead of creating new save point. Is this possible?

;"

Strictly speaking, it’s not overwriting, but I think save_total_limit or save_only_model are closer to the intended purpose.

-
from transformers import TrainingArguments
-
-args = TrainingArguments(
-    output_dir=""out"",
-    save_strategy=""steps"",
-    save_steps=200,
-    save_total_limit=1,      # deletes older checkpoints
-    save_only_model=True,    # 4.37+; skips optimizer/scheduler to shrink size
-)
-
";1 -IndexError: Target N is out of bounds within trainer.train() function;https://discuss.huggingface.co/t/indexerror-target-n-is-out-of-bounds-within-trainer-train-function/168143;168143;5;2025-09-05 19:13:46.123000+00:00;"[{'id': 241307, 'name': 'Javier M.A.', 'username': 'JavierMA', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/j/f19dbf/{size}.png', 'created_at': '2025-09-05T19:13:46.184Z', 'cooked': '

Hi all,

\n

I am trying to train a custom model for NLP sequence classification (multiclass) and struggling to be able to train it for a reason I don’t know, that is the reason why I am asking on this forum. I already had a look at similar posts on the forum with no luck.

\n

First of all, my dataset looks like the following in DataFrame before introducing it to a dataset (5 instances per class or label, being 0 the lowest label number and 251 the maximum one, so 252 labels in total):

\n
                                                   text  label\n0        Configuración del área de selección de TV Set       0\n1         Configuración del área de selección de TV Set      0\n2      Conformación de la sección de selección de TV...      0\n3     Conformación ae la stcción de seldcción de TV Set      0\n4     Validar la configuración del área de selección...      0\n...                                                 ...    ...\n1281  Validación incorrecta por identificador de art...    251\n1282  Validación incorrecta mediante identificador d...    251\n1283  Validación incorrecta por identificador de art...    251\n1284  Validación incorrecta por identificador de art...    251\n1285  Validar Validación incorrecta por identificado...    251\n
\n

As It is a custom model, I changed the value of out_features at out_proj in the classification part, so the resulting architecture looks like the following:

\n
RobertaForSequenceClassification(\n  (roberta): RobertaModel(\n    (embeddings): RobertaEmbeddings(\n      (word_embeddings): Embedding(50262, 1024, padding_idx=1)\n      (position_embeddings): Embedding(514, 1024, padding_idx=1)\n      (token_type_embeddings): Embedding(1, 1024)\n      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n      (dropout): Dropout(p=0.0, inplace=False)\n    )\n    (encoder): RobertaEncoder(\n      (layer): ModuleList(\n        (0-23): 24 x RobertaLayer(\n          (attention): RobertaAttention(\n            (self): RobertaSdpaSelfAttention(\n              (query): Linear(in_features=1024, out_features=1024, bias=True)\n              (key): Linear(in_features=1024, out_features=1024, bias=True)\n              (value): Linear(in_features=1024, out_features=1024, bias=True)\n              (dropout): Dropout(p=0.0, inplace=False)\n            )\n            (output): RobertaSelfOutput(\n              (dense): Linear(in_features=1024, out_features=1024, bias=True)\n              (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n              (dropout): Dropout(p=0.0, inplace=False)\n            )\n          )\n          (intermediate): RobertaIntermediate(\n            (dense): Linear(in_features=1024, out_features=4096, bias=True)\n            (intermediate_act_fn): GELUActivation()\n          )\n          (output): RobertaOutput(\n            (dense): Linear(in_features=4096, out_features=1024, bias=True)\n            (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n            (dropout): Dropout(p=0.0, inplace=False)\n          )\n        )\n      )\n    )\n  )\n  (classifier): RobertaClassificationHead(\n    (dense): Linear(in_features=1024, out_features=1024, bias=True)\n    (dropout): Dropout(p=0.0, inplace=False)\n    (out_proj): Linear(in_features=1024, out_features=252, bias=True)\n  )\n)\n
\n

Then I use the following code in order to create a HuggingFace Dataset:

\n
dataset = Dataset.from_pandas(df, split=\'train\')\ndataset = dataset.train_test_split(shuffle=True, seed=42, test_size=0.2)\nprint(dataset)\n
\n

Where the print gives the following result (I already checked that values in label go from 0 to N-1 labels or classes):

\n
DatasetDict({\n    train: Dataset({\n        features: [\'text\', \'label\'],\n        num_rows: 1028\n    })\n    test: Dataset({\n        features: [\'text\', \'label\'],\n        num_rows: 258\n    })\n})\n
\n

Despite having done all the remaining steps before training correctly (or so I believe) and having at least one instance per class in train and test dataset, when I get to the function train, I get the following error:

\n
---------------------------------------------------------------------------\nIndexError                                Traceback (most recent call last)\nCell In[103], line 1\n----> 1 trainer.train()\n      2 modelo_peft.to(\'cpu\')\n      3 modelo_peft.eval()\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\trainer.py:2238, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\n   2236         hf_hub_utils.enable_progress_bars()\n   2237 else:\n-> 2238     return inner_training_loop(\n   2239         args=args,\n   2240         resume_from_checkpoint=resume_from_checkpoint,\n   2241         trial=trial,\n   2242         ignore_keys_for_eval=ignore_keys_for_eval,\n   2243     )\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\trainer.py:2582, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\n   2575 context = (\n   2576     functools.partial(self.accelerator.no_sync, model=model)\n   2577     if i != len(batch_samples) - 1\n   2578     and self.accelerator.distributed_type != DistributedType.DEEPSPEED\n   2579     else contextlib.nullcontext\n   2580 )\n   2581 with context():\n-> 2582     tr_loss_step = self.training_step(model, inputs, num_items_in_batch)\n   2584 if (\n   2585     args.logging_nan_inf_filter\n   2586     and not is_torch_xla_available()\n   2587     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))\n   2588 ):\n   2589     # if loss is nan or inf simply add the average of previous logged losses\n   2590     tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\trainer.py:3796, in Trainer.training_step(self, model, inputs, num_items_in_batch)\n   3793     return loss_mb.reduce_mean().detach().to(self.args.device)\n   3795 with self.compute_loss_context_manager():\n-> 3796     loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)\n   3798 del inputs\n   3799 if (\n   3800     self.args.torch_empty_cache_steps is not None\n   3801     and self.state.global_step % self.args.torch_empty_cache_steps == 0\n   3802 ):\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\trainer.py:3884, in Trainer.compute_loss(self, model, inputs, return_outputs, num_items_in_batch)\n   3882         kwargs[""num_items_in_batch""] = num_items_in_batch\n   3883     inputs = {**inputs, **kwargs}\n-> 3884 outputs = model(**inputs)\n   3885 # Save past state if it exists\n   3886 # TODO: this needs to be fixed and made cleaner later.\n   3887 if self.args.past_index >= 0:\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)\n   1771     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]\n   1772 else:\n-> 1773     return self._call_impl(*args, **kwargs)\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784, in Module._call_impl(self, *args, **kwargs)\n   1779 # If we don\'t have any hooks, we want to skip the rest of the logic in\n   1780 # this function, and just call forward.\n   1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks\n   1782         or _global_backward_pre_hooks or _global_backward_hooks\n   1783         or _global_forward_hooks or _global_forward_pre_hooks):\n-> 1784     return forward_call(*args, **kwargs)\n   1786 result = None\n   1787 called_always_called_hooks = set()\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\peft\\peft_model.py:1652, in PeftModelForSequenceClassification.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\n   1650         if peft_config.peft_type == PeftType.POLY:\n   1651             kwargs[""task_ids""] = task_ids\n-> 1652         return self.base_model(\n   1653             input_ids=input_ids,\n   1654             attention_mask=attention_mask,\n   1655             inputs_embeds=inputs_embeds,\n   1656             labels=labels,\n   1657             output_attentions=output_attentions,\n   1658             output_hidden_states=output_hidden_states,\n   1659             return_dict=return_dict,\n   1660             **kwargs,\n   1661         )\n   1663 batch_size = _get_batch_size(input_ids, inputs_embeds)\n   1664 if attention_mask is not None:\n   1665     # concat prompt attention mask\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)\n   1771     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]\n   1772 else:\n-> 1773     return self._call_impl(*args, **kwargs)\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784, in Module._call_impl(self, *args, **kwargs)\n   1779 # If we don\'t have any hooks, we want to skip the rest of the logic in\n   1780 # this function, and just call forward.\n   1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks\n   1782         or _global_backward_pre_hooks or _global_backward_hooks\n   1783         or _global_forward_hooks or _global_forward_pre_hooks):\n-> 1784     return forward_call(*args, **kwargs)\n   1786 result = None\n   1787 called_always_called_hooks = set()\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\peft\\tuners\\tuners_utils.py:222, in BaseTuner.forward(self, *args, **kwargs)\n    221 def forward(self, *args: Any, **kwargs: Any):\n--> 222     return self.model.forward(*args, **kwargs)\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\models\\roberta\\modeling_roberta.py:1228, in RobertaForSequenceClassification.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)\n   1226 elif self.config.problem_type == ""single_label_classification"":\n   1227     loss_fct = CrossEntropyLoss()\n-> 1228     loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))\n   1229 elif self.config.problem_type == ""multi_label_classification"":\n   1230     loss_fct = BCEWithLogitsLoss()\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)\n   1771     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]\n   1772 else:\n-> 1773     return self._call_impl(*args, **kwargs)\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1784, in Module._call_impl(self, *args, **kwargs)\n   1779 # If we don\'t have any hooks, we want to skip the rest of the logic in\n   1780 # this function, and just call forward.\n   1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks\n   1782         or _global_backward_pre_hooks or _global_backward_hooks\n   1783         or _global_forward_hooks or _global_forward_pre_hooks):\n-> 1784     return forward_call(*args, **kwargs)\n   1786 result = None\n   1787 called_always_called_hooks = set()\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\modules\\loss.py:1310, in CrossEntropyLoss.forward(self, input, target)\n   1309 def forward(self, input: Tensor, target: Tensor) -> Tensor:\n-> 1310     return F.cross_entropy(\n   1311         input,\n   1312         target,\n   1313         weight=self.weight,\n   1314         ignore_index=self.ignore_index,\n   1315         reduction=self.reduction,\n   1316         label_smoothing=self.label_smoothing,\n   1317     )\n\nFile ~\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\torch\\nn\\functional.py:3462, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)\n   3460 if size_average is not None or reduce is not None:\n   3461     reduction = _Reduction.legacy_get_string(size_average, reduce)\n-> 3462 return torch._C._nn.cross_entropy_loss(\n   3463     input,\n   3464     target,\n   3465     weight,\n   3466     _Reduction.get_enum(reduction),\n   3467     ignore_index,\n   3468     label_smoothing,\n   3469 )\n\nIndexError: Target 134 is out of bounds.\n
\n

Any ideas of what may be wrong? Let me know if any other information is needed.

\n

Thanks,

\n

Javier

', 'post_number': 1, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-06T10:35:54.160Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 5, 'reads': 7, 'readers_count': 6, 'score': 41.4, 'yours': False, 'topic_id': 168143, 'topic_slug': 'indexerror-target-n-is-out-of-bounds-within-trainer-train-function', 'display_username': 'Javier M.A.', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 4, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 103219, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/indexerror-target-n-is-out-of-bounds-within-trainer-train-function/168143/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 241316, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-06T00:10:31.575Z', 'cooked': '

This may occur if num_labels is not passed during model loading.

\n
from datasets import Dataset\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments\nimport numpy as np\nimport pandas as pd\nimport torch\nimport math\n\n# 0) Example dataframe (replace with your df)\n# df = pd.read_csv(""your_data.csv"")  # must contain \'text\' and integer \'label\'\ndf = pd.DataFrame({\n    ""text"": [f""ejemplo {i}"" for i in range(3000)],\n    ""label"": np.repeat(np.arange(252), repeats=math.ceil(3000/252))[:3000]\n})\n\n# 1) Ensure labels are 0..C-1\nC = int(df[""label""].max() + 1)\nm = int(df[""label""].min())\nif m != 0:\n    df[""label""] = df[""label""] - m\nassert df[""label""].between(0, C - 1).all(), ""labels must be in [0, C-1]""\n\n# 2) Build small train/test datasets\nds = Dataset.from_pandas(df[[""text"", ""label""]], split=""train"").train_test_split(test_size=0.1, seed=42)\n\n# 3) Tokenize\ntok = AutoTokenizer.from_pretrained(""roberta-base"")\ndef preprocess(ex):\n    return tok(ex[""text""], truncation=True, padding=""max_length"", max_length=64)\nds_tok = ds.map(preprocess, batched=True).remove_columns([""text""]).with_format(""torch"")\n\n# 4) Create model with the correct class count; let Transformers swap the head\nmodel = AutoModelForSequenceClassification.from_pretrained(\n    ""roberta-base"",\n    num_labels=C, # tells the new classifier size\n    ignore_mismatched_sizes=True,  # skip loading the old head\n)\n# optional but recommended: explicit label maps\nmodel.config.id2label = {i: str(i) for i in range(C)}\nmodel.config.label2id = {v: k for k, v in model.config.id2label.items()}\n\n# 5) Train briefly\nargs = TrainingArguments(\n    output_dir=""out_fix"",\n    per_device_train_batch_size=8,\n    per_device_eval_batch_size=8,\n    learning_rate=5e-5,\n    num_train_epochs=1,\n    logging_steps=10,\n    eval_strategy=""no"",\n    report_to=""none"",\n)\n\ntrainer = Trainer(model=model, args=args, train_dataset=ds_tok[""train""])\ntrainer.train() # IndexError: Target ** is out of bounds. (If without num_labels and ignore_mismatched_sizes)\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-06T00:10:31.575Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 5.8, 'yours': False, 'topic_id': 168143, 'topic_slug': 'indexerror-target-n-is-out-of-bounds-within-trainer-train-function', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/target-is-out-of-bounds/13802', 'internal': True, 'reflection': False, 'title': 'Target {} is out of bounds', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/indexerror-target-n-is-out-of-bounds-within-trainer-train-function/168143/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 241346, 'name': 'Javier M.A.', 'username': 'JavierMA', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/j/f19dbf/{size}.png', 'created_at': '2025-09-06T10:33:50.813Z', 'cooked': '

Many thanks for your answer John. Regarding what you said regarding num_labels, the way I did it in my code was the following (first line in the following code):

\n
nueva_configuracion_modelo = AutoConfig.from_pretrained(nombre_modelo, num_labels=numero_de_etiquetas, id2label=ids_a_etiquetas, label2id=etiquetas_a_id, cache_dir=\'./huggingface_mirror\')\n\nmodelo_roberta = AutoModelForSequenceClassification.from_pretrained(\'PlanTL-GOB-ES/roberta-large-bne-massive\', cache_dir=\'./huggingface_mirror\', local_files_only=True)\n\n\nif modelo_roberta.config.num_labels != nueva_configuracion_modelo.num_labels or modelo_roberta.config.id2label != nueva_configuracion_modelo_config.id2label:\n    modelo_roberta.classifier.out_proj.out_features=nueva_configuracion_modelo.num_labels\n    \nmodelo_roberta.config = nueva_configuracion_modelo\n\nprint(modelo_roberta.config)\n\ntokenizador_roberta = AutoTokenizer.from_pretrained(nombre_modelo, cache_dir=\'./huggingface_mirror\', local_files_only=True, from_pt=True)\n
\n

With that code I changed the value in out_features parameter of layer out_proj in the classification part to 252 (the number of different classes) and saw label2id and id2label updated with values from my custom model.

', 'post_number': 3, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-06T11:12:36.335Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 15.6, 'yours': False, 'topic_id': 168143, 'topic_slug': 'indexerror-target-n-is-out-of-bounds-within-trainer-train-function', 'display_username': 'Javier M.A.', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 103219, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/indexerror-target-n-is-out-of-bounds-within-trainer-train-function/168143/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 241348, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-06T13:12:56.958Z', 'cooked': '

In that case, the actual weigh probably won’t change t even if the attribute is modified.

\n
from transformers import AutoModelForSequenceClassification, AutoTokenizer\nimport torch\n\n# 1) Load a small model with 2 labels so the classifier head is tiny\nmodel = AutoModelForSequenceClassification.from_pretrained(""roberta-base"", num_labels=2)\ntok = AutoTokenizer.from_pretrained(""roberta-base"")\n\nhead = model.classifier.out_proj  # this is an nn.Linear\n\nprint(""=== BEFORE ==="")\nprint(""repr:"", head)\nprint(""out_features attr:"", head.out_features)\nprint(""weight shape:"", tuple(head.weight.shape))\nprint(""bias shape:"", tuple(head.bias.shape))\n\n# 2) Change ONLY the attribute (what your code effectively does)\nhead.out_features = 252  # <-- attribute changed, tensors untouched\n\nprint(""\\n=== AFTER CHANGING ATTRIBUTE ONLY ==="")\nprint(""repr:"", head)  # repr now claims out_features=252\nprint(""out_features attr:"", head.out_features)\nprint(""weight shape:"", tuple(head.weight.shape))  # still (2, hidden_size)\nprint(""bias shape:"", tuple(head.bias.shape))      # still (2,)\n\n# 3) Show the model still produces 2 logits, not 252\nbatch = tok(""hola mundo"", return_tensors=""pt"", padding=True, truncation=True, max_length=16)\nwith torch.no_grad():\n    logits = model(**batch).logits\nprint(""\\nlogits shape from forward():"", tuple(logits.shape))  # last dim is 2\n\n# 4) The correct fix is to REPLACE the Linear layer\nin_f = head.in_features\nmodel.classifier.out_proj = torch.nn.Linear(in_f, 252, bias=True)\n\nprint(""\\n=== AFTER REPLACING THE LAYER ==="")\nprint(""repr:"", model.classifier.out_proj)\nprint(""out_features attr:"", model.classifier.out_proj.out_features)\nprint(""weight shape:"", tuple(model.classifier.out_proj.weight.shape))  # now (252, hidden_size)\nprint(""bias shape:"", tuple(model.classifier.out_proj.bias.shape))      # now (252,)\n\nwith torch.no_grad():\n    logits = model(**batch).logits\nprint(""logits shape from forward():"", tuple(logits.shape))  # last dim is 252\n""""""\n=== BEFORE ===\nrepr: Linear(in_features=768, out_features=2, bias=True)\nout_features attr: 2\nweight shape: (2, 768)\nbias shape: (2,)\n\n=== AFTER CHANGING ATTRIBUTE ONLY ===\nrepr: Linear(in_features=768, out_features=252, bias=True)\nout_features attr: 252\nweight shape: (2, 768)\nbias shape: (2,)\n\nlogits shape from forward(): (1, 2)\n\n=== AFTER REPLACING THE LAYER ===\nrepr: Linear(in_features=768, out_features=252, bias=True)\nout_features attr: 252\nweight shape: (252, 768)\nbias shape: (252,)\nlogits shape from forward(): (1, 252)\n""""""\n
', 'post_number': 4, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-06T13:12:56.958Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 5.6, 'yours': False, 'topic_id': 168143, 'topic_slug': 'indexerror-target-n-is-out-of-bounds-within-trainer-train-function', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/indexerror-target-n-is-out-of-bounds-within-trainer-train-function/168143/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 241357, 'name': 'Javier M.A.', 'username': 'JavierMA', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/j/f19dbf/{size}.png', 'created_at': '2025-09-06T16:13:50.937Z', 'cooked': '

You were totally right John ! I just printed the weight and bias in my code and the results were the original ones, so indeed I was modifying it the wrong way.

\n

So following the example I modified my code from this:

\n
if modelo_roberta.config.num_labels != nueva_configuracion_modelo.num_labels or modelo_roberta.config.id2label != nueva_configuracion_modelo_config.id2label:\n    modelo_roberta.classifier.out_proj.out_features=nueva_configuracion_modelo.num_labels\n    \nmodelo_roberta.config = nueva_configuracion_modelo\n
\n

To this:

\n
modelo_roberta.classifier.out_proj = torch.nn.Linear(modelo_roberta.classifier.out_proj.in_features, numero_de_etiquetas, bias=True)\nmodelo_roberta.num_labels = numero_de_etiquetas\nmodelo_roberta.config = nueva_configuracion_modelo\n
\n

And now it trains.

\n

Many thanks for your help!

', 'post_number': 5, 'post_type': 1, 'posts_count': 6, 'updated_at': '2025-09-06T16:35:51.006Z', 'reply_count': 0, 'reply_to_post_number': 4, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 168143, 'topic_slug': 'indexerror-target-n-is-out-of-bounds-within-trainer-train-function', 'display_username': 'Javier M.A.', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 103219, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/indexerror-target-n-is-out-of-bounds-within-trainer-train-function/168143/5', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 241392, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-07T04:13:52.319Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 6, 'post_type': 3, 'posts_count': 6, 'updated_at': '2025-09-07T04:13:52.319Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 168143, 'topic_slug': 'indexerror-target-n-is-out-of-bounds-within-trainer-train-function', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/indexerror-target-n-is-out-of-bounds-within-trainer-train-function/168143/6', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi all,

-

I am trying to train a custom model for NLP sequence classification (multiclass) and struggling to be able to train it for a reason I don’t know, that is the reason why I am asking on this forum. I already had a look at similar posts on the forum with no luck.

-

First of all, my dataset looks like the following in DataFrame before introducing it to a dataset (5 instances per class or label, being 0 the lowest label number and 251 the maximum one, so 252 labels in total):

-
                                                   text  label
-0        Configuración del área de selección de TV Set       0
-1         Configuración del área de selección de TV Set      0
-2      Conformación de la sección de selección de TV...      0
-3     Conformación ae la stcción de seldcción de TV Set      0
-4     Validar la configuración del área de selección...      0
-...                                                 ...    ...
-1281  Validación incorrecta por identificador de art...    251
-1282  Validación incorrecta mediante identificador d...    251
-1283  Validación incorrecta por identificador de art...    251
-1284  Validación incorrecta por identificador de art...    251
-1285  Validar Validación incorrecta por identificado...    251
-
-

As It is a custom model, I changed the value of out_features at out_proj in the classification part, so the resulting architecture looks like the following:

-
RobertaForSequenceClassification(
-  (roberta): RobertaModel(
-    (embeddings): RobertaEmbeddings(
-      (word_embeddings): Embedding(50262, 1024, padding_idx=1)
-      (position_embeddings): Embedding(514, 1024, padding_idx=1)
-      (token_type_embeddings): Embedding(1, 1024)
-      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
-      (dropout): Dropout(p=0.0, inplace=False)
-    )
-    (encoder): RobertaEncoder(
-      (layer): ModuleList(
-        (0-23): 24 x RobertaLayer(
-          (attention): RobertaAttention(
-            (self): RobertaSdpaSelfAttention(
-              (query): Linear(in_features=1024, out_features=1024, bias=True)
-              (key): Linear(in_features=1024, out_features=1024, bias=True)
-              (value): Linear(in_features=1024, out_features=1024, bias=True)
-              (dropout): Dropout(p=0.0, inplace=False)
-            )
-            (output): RobertaSelfOutput(
-              (dense): Linear(in_features=1024, out_features=1024, bias=True)
-              (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
-              (dropout): Dropout(p=0.0, inplace=False)
-            )
-          )
-          (intermediate): RobertaIntermediate(
-            (dense): Linear(in_features=1024, out_features=4096, bias=True)
-            (intermediate_act_fn): GELUActivation()
-          )
-          (output): RobertaOutput(
-            (dense): Linear(in_features=4096, out_features=1024, bias=True)
-            (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
-            (dropout): Dropout(p=0.0, inplace=False)
-          )
-        )
-      )
-    )
-  )
-  (classifier): RobertaClassificationHead(
-    (dense): Linear(in_features=1024, out_features=1024, bias=True)
-    (dropout): Dropout(p=0.0, inplace=False)
-    (out_proj): Linear(in_features=1024, out_features=252, bias=True)
-  )
-)
-
-

Then I use the following code in order to create a HuggingFace Dataset:

-
dataset = Dataset.from_pandas(df, split='train')
-dataset = dataset.train_test_split(shuffle=True, seed=42, test_size=0.2)
-print(dataset)
-
-

Where the print gives the following result (I already checked that values in label go from 0 to N-1 labels or classes):

-
DatasetDict({
-    train: Dataset({
-        features: ['text', 'label'],
-        num_rows: 1028
-    })
-    test: Dataset({
-        features: ['text', 'label'],
-        num_rows: 258
-    })
-})
-
-

Despite having done all the remaining steps before training correctly (or so I believe) and having at least one instance per class in train and test dataset, when I get to the function train, I get the following error:

-
---------------------------------------------------------------------------
-IndexError                                Traceback (most recent call last)
-Cell In[103], line 1
-----> 1 trainer.train()
-      2 modelo_peft.to('cpu')
-      3 modelo_peft.eval()
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:2238, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
-   2236         hf_hub_utils.enable_progress_bars()
-   2237 else:
--> 2238     return inner_training_loop(
-   2239         args=args,
-   2240         resume_from_checkpoint=resume_from_checkpoint,
-   2241         trial=trial,
-   2242         ignore_keys_for_eval=ignore_keys_for_eval,
-   2243     )
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:2582, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
-   2575 context = (
-   2576     functools.partial(self.accelerator.no_sync, model=model)
-   2577     if i != len(batch_samples) - 1
-   2578     and self.accelerator.distributed_type != DistributedType.DEEPSPEED
-   2579     else contextlib.nullcontext
-   2580 )
-   2581 with context():
--> 2582     tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
-   2584 if (
-   2585     args.logging_nan_inf_filter
-   2586     and not is_torch_xla_available()
-   2587     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
-   2588 ):
-   2589     # if loss is nan or inf simply add the average of previous logged losses
-   2590     tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:3796, in Trainer.training_step(self, model, inputs, num_items_in_batch)
-   3793     return loss_mb.reduce_mean().detach().to(self.args.device)
-   3795 with self.compute_loss_context_manager():
--> 3796     loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
-   3798 del inputs
-   3799 if (
-   3800     self.args.torch_empty_cache_steps is not None
-   3801     and self.state.global_step % self.args.torch_empty_cache_steps == 0
-   3802 ):
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:3884, in Trainer.compute_loss(self, model, inputs, return_outputs, num_items_in_batch)
-   3882         kwargs[""num_items_in_batch""] = num_items_in_batch
-   3883     inputs = {**inputs, **kwargs}
--> 3884 outputs = model(**inputs)
-   3885 # Save past state if it exists
-   3886 # TODO: this needs to be fixed and made cleaner later.
-   3887 if self.args.past_index >= 0:
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)
-   1771     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
-   1772 else:
--> 1773     return self._call_impl(*args, **kwargs)
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1784, in Module._call_impl(self, *args, **kwargs)
-   1779 # If we don't have any hooks, we want to skip the rest of the logic in
-   1780 # this function, and just call forward.
-   1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
-   1782         or _global_backward_pre_hooks or _global_backward_hooks
-   1783         or _global_forward_hooks or _global_forward_pre_hooks):
--> 1784     return forward_call(*args, **kwargs)
-   1786 result = None
-   1787 called_always_called_hooks = set()
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\peft\peft_model.py:1652, in PeftModelForSequenceClassification.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)
-   1650         if peft_config.peft_type == PeftType.POLY:
-   1651             kwargs[""task_ids""] = task_ids
--> 1652         return self.base_model(
-   1653             input_ids=input_ids,
-   1654             attention_mask=attention_mask,
-   1655             inputs_embeds=inputs_embeds,
-   1656             labels=labels,
-   1657             output_attentions=output_attentions,
-   1658             output_hidden_states=output_hidden_states,
-   1659             return_dict=return_dict,
-   1660             **kwargs,
-   1661         )
-   1663 batch_size = _get_batch_size(input_ids, inputs_embeds)
-   1664 if attention_mask is not None:
-   1665     # concat prompt attention mask
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)
-   1771     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
-   1772 else:
--> 1773     return self._call_impl(*args, **kwargs)
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1784, in Module._call_impl(self, *args, **kwargs)
-   1779 # If we don't have any hooks, we want to skip the rest of the logic in
-   1780 # this function, and just call forward.
-   1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
-   1782         or _global_backward_pre_hooks or _global_backward_hooks
-   1783         or _global_forward_hooks or _global_forward_pre_hooks):
--> 1784     return forward_call(*args, **kwargs)
-   1786 result = None
-   1787 called_always_called_hooks = set()
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\peft\tuners\tuners_utils.py:222, in BaseTuner.forward(self, *args, **kwargs)
-    221 def forward(self, *args: Any, **kwargs: Any):
---> 222     return self.model.forward(*args, **kwargs)
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\roberta\modeling_roberta.py:1228, in RobertaForSequenceClassification.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
-   1226 elif self.config.problem_type == ""single_label_classification"":
-   1227     loss_fct = CrossEntropyLoss()
--> 1228     loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
-   1229 elif self.config.problem_type == ""multi_label_classification"":
-   1230     loss_fct = BCEWithLogitsLoss()
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)
-   1771     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
-   1772 else:
--> 1773     return self._call_impl(*args, **kwargs)
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1784, in Module._call_impl(self, *args, **kwargs)
-   1779 # If we don't have any hooks, we want to skip the rest of the logic in
-   1780 # this function, and just call forward.
-   1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
-   1782         or _global_backward_pre_hooks or _global_backward_hooks
-   1783         or _global_forward_hooks or _global_forward_pre_hooks):
--> 1784     return forward_call(*args, **kwargs)
-   1786 result = None
-   1787 called_always_called_hooks = set()
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\loss.py:1310, in CrossEntropyLoss.forward(self, input, target)
-   1309 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 1310     return F.cross_entropy(
-   1311         input,
-   1312         target,
-   1313         weight=self.weight,
-   1314         ignore_index=self.ignore_index,
-   1315         reduction=self.reduction,
-   1316         label_smoothing=self.label_smoothing,
-   1317     )
-
-File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\functional.py:3462, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
-   3460 if size_average is not None or reduce is not None:
-   3461     reduction = _Reduction.legacy_get_string(size_average, reduce)
--> 3462 return torch._C._nn.cross_entropy_loss(
-   3463     input,
-   3464     target,
-   3465     weight,
-   3466     _Reduction.get_enum(reduction),
-   3467     ignore_index,
-   3468     label_smoothing,
-   3469 )
-
-IndexError: Target 134 is out of bounds.
-
-

Any ideas of what may be wrong? Let me know if any other information is needed.

-

Thanks,

-

Javier

";"

In that case, the actual weigh probably won’t change t even if the attribute is modified.

-
from transformers import AutoModelForSequenceClassification, AutoTokenizer
-import torch
-
-# 1) Load a small model with 2 labels so the classifier head is tiny
-model = AutoModelForSequenceClassification.from_pretrained(""roberta-base"", num_labels=2)
-tok = AutoTokenizer.from_pretrained(""roberta-base"")
-
-head = model.classifier.out_proj  # this is an nn.Linear
-
-print(""=== BEFORE ==="")
-print(""repr:"", head)
-print(""out_features attr:"", head.out_features)
-print(""weight shape:"", tuple(head.weight.shape))
-print(""bias shape:"", tuple(head.bias.shape))
-
-# 2) Change ONLY the attribute (what your code effectively does)
-head.out_features = 252  # <-- attribute changed, tensors untouched
-
-print(""\n=== AFTER CHANGING ATTRIBUTE ONLY ==="")
-print(""repr:"", head)  # repr now claims out_features=252
-print(""out_features attr:"", head.out_features)
-print(""weight shape:"", tuple(head.weight.shape))  # still (2, hidden_size)
-print(""bias shape:"", tuple(head.bias.shape))      # still (2,)
-
-# 3) Show the model still produces 2 logits, not 252
-batch = tok(""hola mundo"", return_tensors=""pt"", padding=True, truncation=True, max_length=16)
-with torch.no_grad():
-    logits = model(**batch).logits
-print(""\nlogits shape from forward():"", tuple(logits.shape))  # last dim is 2
-
-# 4) The correct fix is to REPLACE the Linear layer
-in_f = head.in_features
-model.classifier.out_proj = torch.nn.Linear(in_f, 252, bias=True)
-
-print(""\n=== AFTER REPLACING THE LAYER ==="")
-print(""repr:"", model.classifier.out_proj)
-print(""out_features attr:"", model.classifier.out_proj.out_features)
-print(""weight shape:"", tuple(model.classifier.out_proj.weight.shape))  # now (252, hidden_size)
-print(""bias shape:"", tuple(model.classifier.out_proj.bias.shape))      # now (252,)
-
-with torch.no_grad():
-    logits = model(**batch).logits
-print(""logits shape from forward():"", tuple(logits.shape))  # last dim is 252
-""""""
-=== BEFORE ===
-repr: Linear(in_features=768, out_features=2, bias=True)
-out_features attr: 2
-weight shape: (2, 768)
-bias shape: (2,)
-
-=== AFTER CHANGING ATTRIBUTE ONLY ===
-repr: Linear(in_features=768, out_features=252, bias=True)
-out_features attr: 252
-weight shape: (2, 768)
-bias shape: (2,)
-
-logits shape from forward(): (1, 2)
-
-=== AFTER REPLACING THE LAYER ===
-repr: Linear(in_features=768, out_features=252, bias=True)
-out_features attr: 252
-weight shape: (252, 768)
-bias shape: (252,)
-logits shape from forward(): (1, 252)
-""""""
-
";1 -Adding Metadata to a dataset;https://discuss.huggingface.co/t/adding-metadata-to-a-dataset/165626;165626;5;2025-08-04 17:21:08.096000+00:00;"[{'id': 236538, 'name': 'Daniel Russ', 'username': 'danielruss', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/d/bbce88/{size}.png', 'created_at': '2025-08-04T17:21:08.153Z', 'cooked': '

Hi, I have a dataset where the text has a label that is a standardized code. The each code has a title describing the code. The data is in a pandas df called jobs_data

\n
data = {\n    ""text"": jobs_data.JobTitle.to_list(),\n    ""label"": jobs_data.soc2010.to_list(),\n}\nfeatures = {\n    ""text"": Value(""string""),\n    ""label"": ClassLabel(names=soc2010.code.to_list()),\n}\n\njobs_ds = Dataset.from_dict(data,features=Features(features))\n
\n

I would like to include a codes to title dictionary/function to make it easier to convert from a label → code → title
\nIs this possible?
\nThank you

', 'post_number': 1, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-04T17:21:08.153Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 16, 'reads': 6, 'readers_count': 5, 'score': 91.2, 'yours': False, 'topic_id': 165626, 'topic_slug': 'adding-metadata-to-a-dataset', 'display_username': 'Daniel Russ', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 41087, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/adding-metadata-to-a-dataset/165626/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 236574, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-05T00:28:09.191Z', 'cooked': '

If metadata alone is sufficient, using the DatasetInfo class is probably the quickest option.

\n
from datasets import DatasetInfo\n\ndata = {\n    ""text"": jobs_data.JobTitle.to_list(),\n    ""label"": jobs_data.soc2010.to_list(),\n}\n\nfeatures = {\n    ""text"": Value(""string""),\n    ""label"": ClassLabel(names=soc2010.code.to_list()),\n}\n\ncode2title = ""codes to convert from a label → code → title""\n\ninfo = DatasetInfo(\n    description=""Jobs dataset with SOC‐2010 codes"",\n    metadata={""code2title"": code2title}\n)\n\njobs_ds = Dataset.from_dict(data, features=Features(features), info=info)\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-05T00:30:44.478Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 0.6000000000000001, 'yours': False, 'topic_id': 165626, 'topic_slug': 'adding-metadata-to-a-dataset', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/datasets/v4.0.0/en/package_reference/main_classes#datasets.DatasetInfo', 'internal': False, 'reflection': False, 'title': 'Main classes', 'clicks': 2}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/adding-metadata-to-a-dataset/165626/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 241236, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-04T20:41:28.087Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 3, 'post_type': 3, 'posts_count': 3, 'updated_at': '2025-09-04T20:41:28.087Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 165626, 'topic_slug': 'adding-metadata-to-a-dataset', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/adding-metadata-to-a-dataset/165626/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi, I have a dataset where the text has a label that is a standardized code. The each code has a title describing the code. The data is in a pandas df called jobs_data

-
data = {
-    ""text"": jobs_data.JobTitle.to_list(),
-    ""label"": jobs_data.soc2010.to_list(),
-}
-features = {
-    ""text"": Value(""string""),
-    ""label"": ClassLabel(names=soc2010.code.to_list()),
-}
-
-jobs_ds = Dataset.from_dict(data,features=Features(features))
-
-

I would like to include a codes to title dictionary/function to make it easier to convert from a label → code → title
-Is this possible?
-Thank you

";"

If metadata alone is sufficient, using the DatasetInfo class is probably the quickest option.

-
from datasets import DatasetInfo
-
-data = {
-    ""text"": jobs_data.JobTitle.to_list(),
-    ""label"": jobs_data.soc2010.to_list(),
-}
-
-features = {
-    ""text"": Value(""string""),
-    ""label"": ClassLabel(names=soc2010.code.to_list()),
-}
-
-code2title = ""codes to convert from a label → code → title""
-
-info = DatasetInfo(
-    description=""Jobs dataset with SOC‐2010 codes"",
-    metadata={""code2title"": code2title}
-)
-
-jobs_ds = Dataset.from_dict(data, features=Features(features), info=info)
-
";1 -Can I use LoRA with jhu-clsp/ettin-encoder-1b?;https://discuss.huggingface.co/t/can-i-use-lora-with-jhu-clsp-ettin-encoder-1b/167903;167903;5;2025-08-29 14:49:48.934000+00:00;"[{'id': 240628, 'name': 'Alex', 'username': 'SuperBowser', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/9f8e36/{size}.png', 'created_at': '2025-08-29T14:49:49.002Z', 'cooked': '

It looks like jhu-clsp/ettin-encoder-1b does not have any proj layers. Is it possible to use LoRA with this model:

\n
from transformers import AutoModelForSequenceClassification\nmodel_name = ‘jhu-clsp/ettin-encoder-1b’\nmodel = AutoModelForSequenceClassification.from_pretrained(model_name)\nfor parent_name, module in model.named_modules():\n    for child_name, child in module.named_children():\n        if ‘proj’ in child_name:\n            print(child_name)\n            print(“_________”)\n
\n

This code returned nothing.

', 'post_number': 1, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-29T14:49:49.002Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 5, 'reads': 5, 'readers_count': 4, 'score': 41.0, 'yours': False, 'topic_id': 167903, 'topic_slug': 'can-i-use-lora-with-jhu-clsp-ettin-encoder-1b', 'display_username': 'Alex', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102016, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/can-i-use-lora-with-jhu-clsp-ettin-encoder-1b/167903/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 240648, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-30T00:29:33.998Z', 'cooked': '

It seems that for ModernBERT-based models, the target_modules names aren’t proj*. You can apparently also automatically select the target_modules using =""all-linear"".

\n
  ""target_modules"": [\n    ""Wqkv"",\n    ""Wi"",\n    ""Wo""\n  ],\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-30T00:29:33.998Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 16.0, 'yours': False, 'topic_id': 167903, 'topic_slug': 'can-i-use-lora-with-jhu-clsp-ettin-encoder-1b', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/Wb-az/modernbert-lora-adapter-for-emotion-classification/blob/main/adapter_config.json', 'internal': False, 'reflection': False, 'title': 'adapter_config.json · Wb-az/modernbert-lora-adapter-for-emotion-classification at main', 'clicks': 0}, {'url': 'https://huggingface.co/docs/peft/v0.17.0/developer_guides/lora#efficiently-train-tokens-alongside-lora', 'internal': False, 'reflection': False, 'title': 'LoRA', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/can-i-use-lora-with-jhu-clsp-ettin-encoder-1b/167903/2', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 241012, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-02T14:59:52.226Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 3, 'post_type': 3, 'posts_count': 3, 'updated_at': '2025-09-02T14:59:52.226Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 167903, 'topic_slug': 'can-i-use-lora-with-jhu-clsp-ettin-encoder-1b', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/can-i-use-lora-with-jhu-clsp-ettin-encoder-1b/167903/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

It looks like jhu-clsp/ettin-encoder-1b does not have any proj layers. Is it possible to use LoRA with this model:

-
from transformers import AutoModelForSequenceClassification
-model_name = ‘jhu-clsp/ettin-encoder-1b’
-model = AutoModelForSequenceClassification.from_pretrained(model_name)
-for parent_name, module in model.named_modules():
-    for child_name, child in module.named_children():
-        if ‘proj’ in child_name:
-            print(child_name)
-            print(“_________”)
-
-

This code returned nothing.

";"

It seems that for ModernBERT-based models, the target_modules names aren’t proj*. You can apparently also automatically select the target_modules using =""all-linear"".

-
  ""target_modules"": [
-    ""Wqkv"",
-    ""Wi"",
-    ""Wo""
-  ],
-
";1 -Could not find MistralForCausalLM in transformers;https://discuss.huggingface.co/t/could-not-find-mistralforcausallm-in-transformers/167978;167978;5;2025-09-01 02:12:05.710000+00:00;"[{'id': 240814, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-09-01T02:12:05.764Z', 'cooked': '

Hi. I finetuned mistralai/Mistral-Small-24B-Base-2501 on a dataset and now I’m trying to run inference for it. I’m using AutoModelForCausalLM.from_pretrained to load it but getting this error: Could not find MistralForCausalLM neither in transformers. I’m running the latest version of transformers 4.56.0. What might be the reason? Installing transformers from source according to this post support for MistralForCausalLM · Issue #26458 · huggingface/transformers · GitHub didn’t fix it.

', 'post_number': 1, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T02:13:05.174Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 181, 'reads': 5, 'readers_count': 4, 'score': 826.0, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/transformers/issues/26458', 'internal': False, 'reflection': False, 'title': 'support for MistralForCausalLM · Issue #26458 · huggingface/transformers · GitHub', 'clicks': 3}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 240817, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-01T02:46:35.152Z', 'cooked': '

Hmm, maybe it’s missing dependencies or something…?
\nI don’t think the class itself is actually missing…

\n
pip install -U mistral_common sentencepiece\n
\n
import transformers, sys\nprint(""transformers"", transformers.__version__)\ntry:\n    from transformers.models.mistral.modeling_mistral import MistralForCausalLM\n    print(""MistralForCausalLM OK"")\nexcept Exception as e:\n    print(""MistralForCausalLM FAIL:"", e, file=sys.stderr)\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T02:46:35.152Z', 'reply_count': 2, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 6, 'reads': 5, 'readers_count': 4, 'score': 41.0, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/transformers/en/model_doc/mistral', 'internal': False, 'reflection': False, 'title': 'Mistral', 'clicks': 4}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240825, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-09-01T03:22:20.500Z', 'cooked': '

@John6666 getting this when I run that code snippet
\n``
\nMistralForCausalLM FAIL: partially initialized module ‘torchvision’ has no attribute ‘extension’ (most likely due to a circular import)
\n```

', 'post_number': 3, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T03:22:20.500Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 4, 'readers_count': 3, 'score': 25.8, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240826, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-01T03:29:23.628Z', 'cooked': '

Judging just by the error, it’s probably a version mismatch between torch and torchvision.

\n
pip install torchvision==x.xx.x\n
\n

Domain Version Compatibility Matrix for PyTorch

', 'post_number': 4, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T03:29:23.628Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 6, 'reads': 4, 'readers_count': 3, 'score': 50.8, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/pytorch/pytorch/wiki/PyTorch-Versions#domain-version-compatibility-matrix-for-pytorch', 'internal': False, 'reflection': False, 'title': 'PyTorch Versions · pytorch/pytorch Wiki · GitHub', 'clicks': 6}, {'url': 'https://github.com/timeseriesAI/tsai/issues/919', 'internal': False, 'reflection': False, 'title': ""AttributeError: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import) · Issue #919 · timeseriesAI/tsai · GitHub"", 'clicks': 2}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/4', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 240829, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-09-01T04:02:13.578Z', 'cooked': '\n

@John6666 thanks! yes, aligning the versions helped

\n

I have fine-tuned the model and now running into this run-time error while loading it:
\nRuntimeError: Error(s) in loading state_dict for Embedding:
\nsize mismatch for weight: copying a param with shape torch.Size([0]) from checkpoint, the shape in current model is torch.Size([131072, 5120]). Any idea what might be causing this?

', 'post_number': 5, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T04:02:13.578Z', 'reply_count': 0, 'reply_to_post_number': 4, 'quote_count': 1, 'incoming_link_count': 1, 'reads': 4, 'readers_count': 3, 'score': 20.8, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/5', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240830, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-01T04:14:41.113Z', 'cooked': '

Based on the error message, I’d guess it’s either trying to load the PEFT adapter as a whole model weight or the model weights are corrupted…

\n', 'post_number': 6, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T04:14:41.113Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 7, 'reads': 4, 'readers_count': 3, 'score': 30.8, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/transformers/issues/16479#issuecomment-1083225080', 'internal': False, 'reflection': False, 'title': 'Embedding size mismatch when hyperparameter search · Issue #16479 · huggingface/transformers · GitHub', 'clicks': 0}, {'url': 'https://huggingface.co/docs/transformers/v4.56.0/en/peft?load=from_pretrained#load-adapter', 'internal': False, 'reflection': False, 'title': 'PEFT', 'clicks': 0}, {'url': 'https://discuss.huggingface.co/t/size-mismatch-error-for-llm-checkpoint-of-peft-model-with-a-resized-token-embeddings/104157', 'internal': True, 'reflection': False, 'title': 'Size Mismatch error for LLM checkpoint of PEFT model with a resized token embeddings', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/6', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240831, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-09-01T04:22:52.075Z', 'cooked': '

@John6666 could this be because of deepspeed? when I do len(tokenizer) it prints 131072.

', 'post_number': 7, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T04:22:52.075Z', 'reply_count': 0, 'reply_to_post_number': 6, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 3, 'readers_count': 2, 'score': 20.6, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/7', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240832, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-01T04:39:09.015Z', 'cooked': '
\n

could this be because of deepspeed

\n
\n

I think very likely…
\nWhen saving fails in DeepSpeed, it appears an empty tensor is saved instead.

\n', 'post_number': 8, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T04:39:09.015Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 3, 'readers_count': 2, 'score': 10.6, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/peft/issues/2450', 'internal': False, 'reflection': False, 'title': 'modules_to_save resulting in empty tensor with deepspeed zero3 LoRA training · Issue #2450 · huggingface/peft · GitHub', 'clicks': 0}, {'url': 'https://huggingface.co/docs/transformers/v4.56.0/en/deepspeed#save-model-weights', 'internal': False, 'reflection': False, 'title': 'DeepSpeed', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/8', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240833, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-09-01T05:04:32.685Z', 'cooked': '

@John6666 I’m using ""stage3_gather_16bit_weights_on_model_save"": true as suggested here. Not sure what else is causing this.

', 'post_number': 9, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T05:04:32.685Z', 'reply_count': 0, 'reply_to_post_number': 8, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/transformers/v4.56.0/en/deepspeed#save-model-weights', 'internal': False, 'reflection': False, 'title': 'DeepSpeed', 'clicks': 0}], 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/9', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240838, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-09-01T06:40:53.193Z', 'cooked': '

This may also occur when using BF16 or when using older version of PEFT.

\n
pip install -U peft\n
', 'post_number': 10, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T06:40:53.193Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 2, 'readers_count': 1, 'score': 10.4, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/deepspeedai/Megatron-DeepSpeed/issues/298', 'internal': False, 'reflection': False, 'title': 'Deepspeed Zero Stage 3 save a empty model state_dict · Issue #298 · deepspeedai/Megatron-DeepSpeed · GitHub', 'clicks': 0}, {'url': 'https://github.com/huggingface/peft/issues/2450', 'internal': False, 'reflection': False, 'title': 'modules_to_save resulting in empty tensor with deepspeed zero3 LoRA training · Issue #2450 · huggingface/peft · GitHub', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/10', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240844, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-09-01T09:08:55.940Z', 'cooked': '

@John6666 using model.save_16bit_model() to save the model insread of save_pretrained() fixed this!

', 'post_number': 11, 'post_type': 1, 'posts_count': 12, 'updated_at': '2025-09-01T09:08:55.940Z', 'reply_count': 0, 'reply_to_post_number': 10, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/11', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240913, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-09-01T21:09:24.800Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 12, 'post_type': 3, 'posts_count': 12, 'updated_at': '2025-09-01T21:09:24.800Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 1, 'readers_count': 0, 'score': 5.2, 'yours': False, 'topic_id': 167978, 'topic_slug': 'could-not-find-mistralforcausallm-in-transformers', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/could-not-find-mistralforcausallm-in-transformers/167978/12', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi. I finetuned mistralai/Mistral-Small-24B-Base-2501 on a dataset and now I’m trying to run inference for it. I’m using AutoModelForCausalLM.from_pretrained to load it but getting this error: Could not find MistralForCausalLM neither in transformers. I’m running the latest version of transformers 4.56.0. What might be the reason? Installing transformers from source according to this post support for MistralForCausalLM · Issue #26458 · huggingface/transformers · GitHub didn’t fix it.

";"

Judging just by the error, it’s probably a version mismatch between torch and torchvision.

-
pip install torchvision==x.xx.x
-
-

Domain Version Compatibility Matrix for PyTorch

";1 -Which data parallel does trainer use? DP or DDP?;https://discuss.huggingface.co/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021;16021;9;2022-03-24 06:03:27.073000+00:00;"[{'id': 33067, 'name': 'dr_xiami', 'username': 'xiami', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/x/dc4da7/{size}.png', 'created_at': '2022-03-24T06:03:27.154Z', 'cooked': '

I try to search in the doc. But I didn’t find the answer anywhere.

\n

Thank you

', 'post_number': 1, 'post_type': 1, 'posts_count': 7, 'updated_at': '2022-03-24T06:03:27.154Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 5299, 'reads': 205, 'readers_count': 204, 'score': 26516.0, 'yours': False, 'topic_id': 16021, 'topic_slug': 'which-data-parallel-does-trainer-use-dp-or-ddp', 'display_username': 'dr_xiami', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 2}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 3838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021/1', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 2}], 'current_user_reaction': None, 'reaction_users_count': 2, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 33091, 'name': 'Sylvain Gugger', 'username': 'sgugger', 'avatar_template': '/user_avatar/discuss.huggingface.co/sgugger/{size}/2291_2.png', 'created_at': '2022-03-24T12:22:07.153Z', 'cooked': '

It depends if you launch your training script with python (in which case it will use DP) or python -m torch.distributed.launch (in which case it will use DDP).

', 'post_number': 2, 'post_type': 1, 'posts_count': 7, 'updated_at': '2022-03-24T12:22:07.153Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 331, 'reads': 203, 'readers_count': 202, 'score': 1750.6, 'yours': False, 'topic_id': 16021, 'topic_slug': 'which-data-parallel-does-trainer-use-dp-or-ddp', 'display_username': 'Sylvain Gugger', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 4}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 6, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 4}], 'current_user_reaction': None, 'reaction_users_count': 4, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 42484, 'name': 'Brando Miranda', 'username': 'brando', 'avatar_template': '/user_avatar/discuss.huggingface.co/brando/{size}/30114_2.png', 'created_at': '2022-08-17T15:03:18.063Z', 'cooked': '

perhaps useful to you: Using Transformers with DistributedDataParallel — any examples?

', 'post_number': 3, 'post_type': 1, 'posts_count': 7, 'updated_at': '2022-08-17T15:03:18.063Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 47, 'reads': 193, 'readers_count': 192, 'score': 318.6, 'yours': False, 'topic_id': 16021, 'topic_slug': 'which-data-parallel-does-trainer-use-dp-or-ddp', 'display_username': 'Brando Miranda', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/using-transformers-with-distributeddataparallel-any-examples/10775', 'internal': True, 'reflection': False, 'title': 'Using Transformers with DistributedDataParallel — any examples?', 'clicks': 1940}, {'url': 'https://discuss.huggingface.co/t/how-to-run-an-end-to-end-example-of-distributed-data-parallel-with-hugging-faces-trainer-api-ideally-on-a-single-node-multiple-gpus/21750', 'internal': True, 'reflection': True, 'title': ""How to run an end to end example of distributed data parallel with hugging face's trainer api (ideally on a single node multiple gpus)?"", 'clicks': 16}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 3}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 3664, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021/3', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 3}], 'current_user_reaction': None, 'reaction_users_count': 3, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240653, 'name': 'Rylan Schaeffer', 'username': 'RylanSchaeffer', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/6f9a4e/{size}.png', 'created_at': '2025-08-30T01:34:06.356Z', 'cooked': '

I know this is a bit of an old thread, but I have a follow up question. I’m creating a Trainer() , evaluating, training and evaluating again. Here’s a snippet of my code:

\n

```
\ntrainer = Trainer(
\nmodel=model,
\nprocessing_class=tokenizer,
\nargs=pretraining_config,
\ntrain_dataset=train_dataset,
\neval_dataset=eval_dataset,
\ndata_collator=data_collator,
\n)

\n

logging.info(“Evaluating before training…”)
\neval_metrics_before = trainer.evaluate()
\nwandb.log({f""eval_before/{k}"": v for k, v in eval_metrics_before.items()})
\npprint.pprint(eval_metrics_before)

\n

logging.info(“Beginning training…”)
\ntrainer.train()

\n

logging.info(“Finished training. Beginning final evaluation…”)
\neval_metrics_after = trainer.evaluate()
\nwandb.log({f""eval_after/{k}"": v for k, v in eval_metrics_after.items()})
\npprint.pprint(eval_metrics_after)
\n```

\n

When I run with two GPUs and a model small enough to fit on each, I noticed while the job is running that evaluating appears to use data parallelism over the two visible GPUs, but does not for training. Do you know what might cause that or how to fix it?

', 'post_number': 4, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-08-30T01:34:56.436Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 16021, 'topic_slug': 'which-data-parallel-does-trainer-use-dp-or-ddp', 'display_username': 'Rylan Schaeffer', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 4145, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021/4', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240654, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-30T02:42:00.790Z', 'cooked': '

Hmm… Have you tried launching it via accelerate or torchrun?

\n
# single node, 2 GPUs\ntorchrun --nproc_per_node=2 train.py\n# or\naccelerate launch --num_processes=2 train.py\n
\n

Accelerator selection

', 'post_number': 5, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-08-30T02:42:00.790Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 16021, 'topic_slug': 'which-data-parallel-does-trainer-use-dp-or-ddp', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/how-to-run-single-node-multi-gpu-training-with-hf-trainer/19503', 'internal': True, 'reflection': False, 'title': 'How to run single-node, multi-GPU training with HF Trainer?', 'clicks': 1}, {'url': 'https://huggingface.co/docs/transformers/v4.56.0/en/accelerator_selection', 'internal': False, 'reflection': False, 'title': 'Accelerator selection', 'clicks': 1}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021/5', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240658, 'name': 'Rylan Schaeffer', 'username': 'RylanSchaeffer', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/r/6f9a4e/{size}.png', 'created_at': '2025-08-30T04:23:56.271Z', 'cooked': '\n

Yeah, I would’ve thought that launching with python would use DP and thus would only use 1 available GPU. And that’s partially correct: train() indeed only uses 1 GPU, but evaluate() uses 2 GPUs. Hence my confusion…

', 'post_number': 6, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-08-30T04:23:56.271Z', 'reply_count': 0, 'reply_to_post_number': 5, 'quote_count': 1, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 15.4, 'yours': False, 'topic_id': 16021, 'topic_slug': 'which-data-parallel-does-trainer-use-dp-or-ddp', 'display_username': 'Rylan Schaeffer', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 4145, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021/6', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240668, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-30T05:25:09.372Z', 'cooked': '

I see. When running distributed training, if you launch it as a single process, evaluate sometimes behaves differently from the Trainer part…Since DP itself seems quite fragile, using DDP is probably the simpler approach…

', 'post_number': 7, 'post_type': 1, 'posts_count': 7, 'updated_at': '2025-08-30T05:25:09.372Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 16021, 'topic_slug': 'which-data-parallel-does-trainer-use-dp-or-ddp', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.pytorch.org/t/bug-in-dataparallel-only-works-if-the-dataset-device-is-cuda-0/28634', 'internal': False, 'reflection': False, 'title': 'Bug in DataParallel? Only works if the dataset device is cuda:0 - PyTorch Forums', 'clicks': 1}, {'url': 'https://github.com/huggingface/transformers/issues/28956', 'internal': False, 'reflection': False, 'title': 'The Trainer uses all available GPU devices when training but only one when evaluating. · Issue #28956 · huggingface/transformers · GitHub', 'clicks': 1}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/which-data-parallel-does-trainer-use-dp-or-ddp/16021/7', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

I try to search in the doc. But I didn’t find the answer anywhere.

-

Thank you

";

It depends if you launch your training script with python (in which case it will use DP) or python -m torch.distributed.launch (in which case it will use DDP).

;1 -Gradient Overflow issue while using deepspeed;https://discuss.huggingface.co/t/gradient-overflow-issue-while-using-deepspeed/167833;167833;5;2025-08-28 00:39:29.361000+00:00;"[{'id': 240473, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-08-28T00:39:29.422Z', 'cooked': '

Hi. I’m trying to fine-tune mistralai/Mistral-Small-24B-Base-2501 using deepspeed and consistently getting the overflow error. When I use bf16 and fp32,I don’t see the overflow issue but the training loss is Nan. When I switch to fp16 the training loss is correct but it throws the overflow error. How can I fix this? This works fine with smaller models. Using lr=1e-7.

\n

My df_config.json:

\n
{\n    ""train_micro_batch_size_per_gpu"": 1,\n    ""gradient_accumulation_steps"": 8,\n    ""zero_optimization"": {\n        ""stage"": 2\n    },\n    ""zero_allow_untested_optimizer"": true,\n    ""fp16"": {\n        ""enabled"": true,\n        ""loss_scale"": 0,\n        ""initial_scale_power"": 32,\n        ""loss_scale_window"": 1000,\n        ""hysteresis"": 2,\n        ""min_loss_scale"": 1\n    },\n    ""gradient_clipping"": 1.0,\n    ""wall_clock_breakdown"": false\n}\n
\n

Using deepspeed 0.17.2 and transformers 4.42.4.

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-28T00:42:21.118Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 17, 'reads': 6, 'readers_count': 5, 'score': 81.2, 'yours': False, 'topic_id': 167833, 'topic_slug': 'gradient-overflow-issue-while-using-deepspeed', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/gradient-overflow-issue-while-using-deepspeed/167833/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 240474, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-28T01:04:31.600Z', 'cooked': '

If the GPU supports bfloat16, it’s probably better to use bfloat16. Regarding NaN issues, SDPA seems to be the culprit in many cases. Try attn_implementation=""eager"".

\n', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-28T01:04:31.600Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 5, 'readers_count': 4, 'score': 26.0, 'yours': False, 'topic_id': 167833, 'topic_slug': 'gradient-overflow-issue-while-using-deepspeed', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/pytorch/pytorch/issues/103749', 'internal': False, 'reflection': False, 'title': 'SDPA produces NaN with padding mask · Issue #103749 · pytorch/pytorch · GitHub', 'clicks': 1}, {'url': 'https://github.com/pytorch/pytorch/issues/139298', 'internal': False, 'reflection': False, 'title': 'CUDNN sdp attention causes loss explosion · Issue #139298 · pytorch/pytorch · GitHub', 'clicks': 0}, {'url': 'https://github.com/huggingface/transformers/issues/32390', 'internal': False, 'reflection': False, 'title': 'Gemma 2 returns NaN when using default attn (sdpa) with padding · Issue #32390 · huggingface/transformers · GitHub', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/gradient-overflow-issue-while-using-deepspeed/167833/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 240480, 'name': 'Jay', 'username': 'jaydeepb', 'avatar_template': '/user_avatar/discuss.huggingface.co/jaydeepb/{size}/14906_2.png', 'created_at': '2025-08-28T04:50:31.820Z', 'cooked': '

@John6666 loading the model in bfloat16 and then using bf16=true in deepspeed seems to solve this issue for now!

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-28T04:50:31.820Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 167833, 'topic_slug': 'gradient-overflow-issue-while-using-deepspeed', 'display_username': 'Jay', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 16838, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/gradient-overflow-issue-while-using-deepspeed/167833/3', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240534, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-28T16:51:04.376Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-08-28T16:51:04.376Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 167833, 'topic_slug': 'gradient-overflow-issue-while-using-deepspeed', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/gradient-overflow-issue-while-using-deepspeed/167833/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi. I’m trying to fine-tune mistralai/Mistral-Small-24B-Base-2501 using deepspeed and consistently getting the overflow error. When I use bf16 and fp32,I don’t see the overflow issue but the training loss is Nan. When I switch to fp16 the training loss is correct but it throws the overflow error. How can I fix this? This works fine with smaller models. Using lr=1e-7.

-

My df_config.json:

-
{
-    ""train_micro_batch_size_per_gpu"": 1,
-    ""gradient_accumulation_steps"": 8,
-    ""zero_optimization"": {
-        ""stage"": 2
-    },
-    ""zero_allow_untested_optimizer"": true,
-    ""fp16"": {
-        ""enabled"": true,
-        ""loss_scale"": 0,
-        ""initial_scale_power"": 32,
-        ""loss_scale_window"": 1000,
-        ""hysteresis"": 2,
-        ""min_loss_scale"": 1
-    },
-    ""gradient_clipping"": 1.0,
-    ""wall_clock_breakdown"": false
-}
-
-

Using deepspeed 0.17.2 and transformers 4.42.4.

";"

If the GPU supports bfloat16, it’s probably better to use bfloat16. Regarding NaN issues, SDPA seems to be the culprit in many cases. Try attn_implementation=""eager"".

-";1 -Setting max_length does not limit length of output;https://discuss.huggingface.co/t/setting-max-length-does-not-limit-length-of-output/167794;167794;20;2025-08-27 00:53:51.090000+00:00;"[{'id': 240359, 'name': 'Travis Lelle', 'username': 'info5ec', 'avatar_template': '/user_avatar/discuss.huggingface.co/info5ec/{size}/53106_2.png', 'created_at': '2025-08-27T00:53:51.147Z', 'cooked': '
>>> generator = pipeline(""text-generation"", model=""HuggingFaceTB/SmolLM2-360M"")\nconfig.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████��███████████████████████████████████████████████████| 689/689 [00:00<00:00, 415kB/s]\nmodel.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 724M/724M [00:09<00:00, 73.1MB/s]\ngeneration_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 111/111 [00:00<00:00, 697kB/s]\ntokenizer_config.json: 3.66kB [00:00, 10.4MB/s]\nvocab.json: 801kB [00:00, 9.48MB/s]\nmerges.txt: 466kB [00:00, 36.9MB/s]\ntokenizer.json: 2.10MB [00:00, 53.9MB/s]\nspecial_tokens_map.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 831/831 [00:00<00:00, 1.66MB/s]\nDevice set to use mps:0\n>>> generator(""I\'m not sure if I know how to"", max_length=50, num_return_sequences=3,)\nTruncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to \'longest_first\' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\nSetting `pad_token_id` to `eos_token_id`:0 for open-end generation.\nBoth `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n[{\'generated_text\': ""I\'m not sure if I know how to explain this. The problem basically is that you can\'t have a value of 0 in the output. I\'m trying to do the following:\\n\\nfloat x = 2.0;\\nfloat y = 0.0;\\nfloat z = 1.0;\\nfloat z2;\\n\\nz2 = z + x*y;\\n\\nI understand that y*z should be 2.0*0.0 = 0.0, but I\'m not sure how to get the 0.0 in the z2 variable.\\n\\n## Answers\\n\\n0\\n1. If you are trying to get the 0.0 in z2, please look at the following code:\\nbool true = (z2*z2) > 0;\\n\\n// The result is 0.0\\n\\nfloat z2 = z2*z2;\\n\\n// The result is 0.0\\n\\nfloat z2 = z2*z2*z2;\\n\\n// The result is 0.0\\n\\n## Re: How to get 0 in a value in the output in a function\\n\\nThanks for the reply! I understand the problem now.\\n\\nI was trying""}, {\'generated_text\': ""I\'m not sure if I know how to do that.\\n\\nHow can I find the derivative of 1/x?\\n\\nI can\'t find the derivative of x^3\\n\\nI can\'t find the derivative of x^1/2\\n\\nI can\'t find the derivative of x^1/3\\n\\nI can\'t find the derivative of x^1/4\\n\\nI can\'t find the derivative of x^1/5\\n\\nI can\'t find the derivative of x^1/6\\n\\nI can\'t find the derivative of x^1/7\\n\\nI can\'t find the derivative of x^1/8\\n\\nI can\'t find the derivative of x^1/9\\n\\nI can\'t find the derivative of x^10\\n\\nI can\'t find the derivative of x^11\\n\\nI can\'t find the derivative of x^12\\n\\nI can\'t find the derivative of x^13\\n\\nI can\'t find the derivative of x^14\\n\\nI can\'t find the derivative of x^15\\n\\nI can\'t find the derivative of x^16\\n\\nI can\'t find the derivative of x^17\\n\\nI can\'t find the derivative of x^""}, {\'generated_text\': ""I\'m not sure if I know how to do this, but I tried to make a function that generates the 64 bit numbers and I got 128 bit numbers.\\n\\n```function rand64(digits = 128) {\\nconst digits = digits;\\nconst d = 7;\\nconst s = 2147483647;\\nconst e = -2147483648;\\nconst f = 1;\\nconst g = 2;\\nconst h = 3;\\nconst i = 4;\\n\\nconst m = 1024;\\nconst d1 = 1 << d;\\nconst d2 = 1 << d - d1;\\nconst d3 = 1 << d - d1 - d2;\\nconst d4 = 1 << d - d1 - d2 - d3;\\nconst d5 = 1 << d - d1 - d2 - d3 - d4;\\nconst d6 = 1 << d - d1 - d2 - d3 - d4 - d5;\\nconst d7 = 1 << d - d1 - d2 - d3 - d4 - d""}]\n\n
\n

It doesn’t seem like the max_length is being honored when this is run. This is straight out of the LLM course under the “Transformers, what can they do?” section.

', 'post_number': 1, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-27T00:53:51.147Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 13, 'reads': 7, 'readers_count': 6, 'score': 81.4, 'yours': False, 'topic_id': 167794, 'topic_slug': 'setting-max-length-does-not-limit-length-of-output', 'display_username': 'Travis Lelle', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102600, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/setting-max-length-does-not-limit-length-of-output/167794/1', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 240366, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-27T03:20:49.986Z', 'cooked': '

With the current Transformers library code, max_new_tokens takes precedence over max_length, so specifying max_new_tokens is the simplest approach.

', 'post_number': 2, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-27T03:20:49.986Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 7, 'readers_count': 6, 'score': 16.4, 'yours': False, 'topic_id': 167794, 'topic_slug': 'setting-max-length-does-not-limit-length-of-output', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig.max_length', 'internal': False, 'reflection': False, 'title': 'Generation', 'clicks': 4}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/setting-max-length-does-not-limit-length-of-output/167794/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 240416, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-27T15:21:13.240Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 3, 'post_type': 3, 'posts_count': 3, 'updated_at': '2025-08-27T15:21:13.240Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 4, 'readers_count': 3, 'score': 5.8, 'yours': False, 'topic_id': 167794, 'topic_slug': 'setting-max-length-does-not-limit-length-of-output', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/setting-max-length-does-not-limit-length-of-output/167794/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"
>>> generator = pipeline(""text-generation"", model=""HuggingFaceTB/SmolLM2-360M"")
-config.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 689/689 [00:00<00:00, 415kB/s]
-model.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 724M/724M [00:09<00:00, 73.1MB/s]
-generation_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 111/111 [00:00<00:00, 697kB/s]
-tokenizer_config.json: 3.66kB [00:00, 10.4MB/s]
-vocab.json: 801kB [00:00, 9.48MB/s]
-merges.txt: 466kB [00:00, 36.9MB/s]
-tokenizer.json: 2.10MB [00:00, 53.9MB/s]
-special_tokens_map.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 831/831 [00:00<00:00, 1.66MB/s]
-Device set to use mps:0
->>> generator(""I'm not sure if I know how to"", max_length=50, num_return_sequences=3,)
-Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
-Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
-Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
-[{'generated_text': ""I'm not sure if I know how to explain this. The problem basically is that you can't have a value of 0 in the output. I'm trying to do the following:\n\nfloat x = 2.0;\nfloat y = 0.0;\nfloat z = 1.0;\nfloat z2;\n\nz2 = z + x*y;\n\nI understand that y*z should be 2.0*0.0 = 0.0, but I'm not sure how to get the 0.0 in the z2 variable.\n\n## Answers\n\n0\n1. If you are trying to get the 0.0 in z2, please look at the following code:\nbool true = (z2*z2) > 0;\n\n// The result is 0.0\n\nfloat z2 = z2*z2;\n\n// The result is 0.0\n\nfloat z2 = z2*z2*z2;\n\n// The result is 0.0\n\n## Re: How to get 0 in a value in the output in a function\n\nThanks for the reply! I understand the problem now.\n\nI was trying""}, {'generated_text': ""I'm not sure if I know how to do that.\n\nHow can I find the derivative of 1/x?\n\nI can't find the derivative of x^3\n\nI can't find the derivative of x^1/2\n\nI can't find the derivative of x^1/3\n\nI can't find the derivative of x^1/4\n\nI can't find the derivative of x^1/5\n\nI can't find the derivative of x^1/6\n\nI can't find the derivative of x^1/7\n\nI can't find the derivative of x^1/8\n\nI can't find the derivative of x^1/9\n\nI can't find the derivative of x^10\n\nI can't find the derivative of x^11\n\nI can't find the derivative of x^12\n\nI can't find the derivative of x^13\n\nI can't find the derivative of x^14\n\nI can't find the derivative of x^15\n\nI can't find the derivative of x^16\n\nI can't find the derivative of x^17\n\nI can't find the derivative of x^""}, {'generated_text': ""I'm not sure if I know how to do this, but I tried to make a function that generates the 64 bit numbers and I got 128 bit numbers.\n\n```function rand64(digits = 128) {\nconst digits = digits;\nconst d = 7;\nconst s = 2147483647;\nconst e = -2147483648;\nconst f = 1;\nconst g = 2;\nconst h = 3;\nconst i = 4;\n\nconst m = 1024;\nconst d1 = 1 << d;\nconst d2 = 1 << d - d1;\nconst d3 = 1 << d - d1 - d2;\nconst d4 = 1 << d - d1 - d2 - d3;\nconst d5 = 1 << d - d1 - d2 - d3 - d4;\nconst d6 = 1 << d - d1 - d2 - d3 - d4 - d5;\nconst d7 = 1 << d - d1 - d2 - d3 - d4 - d""}]
-
-
-

It doesn’t seem like the max_length is being honored when this is run. This is straight out of the LLM course under the “Transformers, what can they do?” section.

";"

With the current Transformers library code, max_new_tokens takes precedence over max_length, so specifying max_new_tokens is the simplest approach.

";1 -"Cannot import name ‘_resolve_process_group’ from ‘torch.distributed.distributed_c10d’";https://discuss.huggingface.co/t/cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d/167762;167762;9;2025-08-25 19:56:34.430000+00:00;"[{'id': 240239, 'name': 'Elizabeth Wainwright', 'username': 'ewainwright', 'avatar_template': '/user_avatar/discuss.huggingface.co/ewainwright/{size}/53052_2.png', 'created_at': '2025-08-25T19:56:34.479Z', 'cooked': '

I got the following error when calling the HuggingFaceLLM class:

\n
Failed to import transformers.generation.utils because of the following error (look up to see its traceback): cannot import name \'_resolve_process_group\' from \'torch.distributed.distributed_c10d\'\n
\n

I looked into the source code and sure enough that function is not in there. Is this a versioning problem?

\n

Update: I downgraded transformers to version 4.27.4 and that seemed to solve that issue but now I have a keyerror for “mistral”. Is there anyway I can solve this issue without downgrading transformers?

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-25T20:47:38.847Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 24, 'reads': 3, 'readers_count': 2, 'score': 135.6, 'yours': False, 'topic_id': 167762, 'topic_slug': 'cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d', 'display_username': 'Elizabeth Wainwright', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 3, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102505, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d/167762/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 240260, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-26T00:33:05.978Z', 'cooked': '

This error seems to occur when PyTorch is far older than Transformers. It should be OK with PyTorch 2.4 or later.

\n
import torch, torch.distributed as dist\nprint(torch.__version__, \'dist?\', dist.is_available())\n# Expect: 2.4+  dist? True\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-26T00:33:05.978Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 4, 'reads': 3, 'readers_count': 2, 'score': 25.6, 'yours': False, 'topic_id': 167762, 'topic_slug': 'cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://forums.developer.nvidia.com/t/pytorch-2-0-0-nv23-05/273736', 'internal': False, 'reflection': False, 'title': 'pyTorch 2.0.0.nv23.05 - Jetson Orin Nano - NVIDIA Developer Forums', 'clicks': 1}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d/167762/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 240294, 'name': 'Elizabeth Wainwright', 'username': 'ewainwright', 'avatar_template': '/user_avatar/discuss.huggingface.co/ewainwright/{size}/53052_2.png', 'created_at': '2025-08-26T12:32:16.124Z', 'cooked': '

Thanks this worked

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-26T12:32:16.124Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 15.6, 'yours': False, 'topic_id': 167762, 'topic_slug': 'cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d', 'display_username': 'Elizabeth Wainwright', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102505, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d/167762/3', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240358, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-27T00:32:22.645Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-08-27T00:32:22.645Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 167762, 'topic_slug': 'cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/cannot-import-name-resolve-process-group-from-torch-distributed-distributed-c10d/167762/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

I got the following error when calling the HuggingFaceLLM class:

-
Failed to import transformers.generation.utils because of the following error (look up to see its traceback): cannot import name '_resolve_process_group' from 'torch.distributed.distributed_c10d'
-
-

I looked into the source code and sure enough that function is not in there. Is this a versioning problem?

-

Update: I downgraded transformers to version 4.27.4 and that seemed to solve that issue but now I have a keyerror for “mistral”. Is there anyway I can solve this issue without downgrading transformers?

";"

This error seems to occur when PyTorch is far older than Transformers. It should be OK with PyTorch 2.4 or later.

-
import torch, torch.distributed as dist
-print(torch.__version__, 'dist?', dist.is_available())
-# Expect: 2.4+  dist? True
-
";1 -Private Space authentication for external API calls;https://discuss.huggingface.co/t/private-space-authentication-for-external-api-calls/167772;167772;24;2025-08-26 08:43:45.781000+00:00;"[{'id': 240276, 'name': 'Mohamed Nasr', 'username': 'nasr7322', 'avatar_template': '/user_avatar/discuss.huggingface.co/nasr7322/{size}/53080_2.png', 'created_at': '2025-08-26T08:43:45.839Z', 'cooked': '

Hello everyone!
\nI’m using a Docker Space to deploy my FastAPI application that uses multiple models, but I’ve set it to private since my project contains sensitive code. My problem is that I can’t send requests to the endpoints from anywhere outside my browser and get a 404.

\n

Is it possible to send a token with the request to authenticate myself? If so, how should I include it in my request to make it work properly?

\n

Thank you all in advance!

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-26T08:43:45.839Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 17, 'reads': 12, 'readers_count': 11, 'score': 97.2, 'yours': False, 'topic_id': 167772, 'topic_slug': 'private-space-authentication-for-external-api-calls', 'display_username': 'Mohamed Nasr', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/http-1-1-404-not-found/167933/2', 'internal': True, 'reflection': True, 'title': 'HTTP/1.1 404 Not Found', 'clicks': 0}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102545, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/private-space-authentication-for-external-api-calls/167772/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 240277, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-26T09:10:04.255Z', 'cooked': '

If the space is functioning properly, you should be able to access it like following.
\nYou can figure out the actual space URL yourself, also you can also find it using the GUI.

\n
curl -X POST https://OWNER-SPACENAME.hf.space/api/predict \\\n  -H ""Authorization: Bearer $HF_TOKEN"" \\\n  -H ""Content-Type: application/json"" \\\n  -d \'{""text"":""hello""}\'\n
\n

or

\n
import os, requests\nurl = ""https://OWNER-SPACENAME.hf.space/api/predict""\nr = requests.post(url,\n                  headers={""Authorization"": f""Bearer {os.getenv(\'HF_TOKEN\')}""},\n                  json={""text"": ""hello""},\n                  timeout=60)\nprint(r.status_code, r.text)\n
\n

If you want to implement more complex access control.

', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-26T09:10:43.033Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 11, 'readers_count': 10, 'score': 22.0, 'yours': False, 'topic_id': 167772, 'topic_slug': 'private-space-authentication-for-external-api-calls', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/docs/hub/en/spaces-embed', 'internal': False, 'reflection': False, 'title': 'Embed your Space in another website', 'clicks': 2}, {'url': 'https://huggingface.co/spaces/zero-gpu-explorers/README/discussions/88#68a736ebb21506a456c47c81', 'internal': False, 'reflection': False, 'clicks': 2}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/private-space-authentication-for-external-api-calls/167772/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 240278, 'name': 'Mohamed Nasr', 'username': 'nasr7322', 'avatar_template': '/user_avatar/discuss.huggingface.co/nasr7322/{size}/53080_2.png', 'created_at': '2025-08-26T09:11:44.798Z', 'cooked': '

yup it worked, thank youu!
\nmy problem was with the token

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-26T09:11:44.798Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 11, 'readers_count': 10, 'score': 17.0, 'yours': False, 'topic_id': 167772, 'topic_slug': 'private-space-authentication-for-external-api-calls', 'display_username': 'Mohamed Nasr', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102545, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/private-space-authentication-for-external-api-calls/167772/3', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240346, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-26T21:12:23.222Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-08-26T21:12:23.222Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 5, 'readers_count': 4, 'score': 0.8, 'yours': False, 'topic_id': 167772, 'topic_slug': 'private-space-authentication-for-external-api-calls', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/private-space-authentication-for-external-api-calls/167772/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hello everyone!
-I’m using a Docker Space to deploy my FastAPI application that uses multiple models, but I’ve set it to private since my project contains sensitive code. My problem is that I can’t send requests to the endpoints from anywhere outside my browser and get a 404.

-

Is it possible to send a token with the request to authenticate myself? If so, how should I include it in my request to make it work properly?

-

Thank you all in advance!

";"

If the space is functioning properly, you should be able to access it like following.
-You can figure out the actual space URL yourself, also you can also find it using the GUI.

-
curl -X POST https://OWNER-SPACENAME.hf.space/api/predict \
-  -H ""Authorization: Bearer $HF_TOKEN"" \
-  -H ""Content-Type: application/json"" \
-  -d '{""text"":""hello""}'
-
-

or

-
import os, requests
-url = ""https://OWNER-SPACENAME.hf.space/api/predict""
-r = requests.post(url,
-                  headers={""Authorization"": f""Bearer {os.getenv('HF_TOKEN')}""},
-                  json={""text"": ""hello""},
-                  timeout=60)
-print(r.status_code, r.text)
-
-

If you want to implement more complex access control.

";1 -Vet/vetgpt-2-7b n8n connection;https://discuss.huggingface.co/t/vet-vetgpt-2-7b-n8n-connection/167187;167187;5;2025-08-18 16:40:15.956000+00:00;"[{'id': 239110, 'name': 'Cristiane Sousa', 'username': 'ketask', 'avatar_template': '/user_avatar/discuss.huggingface.co/ketask/{size}/52727_2.png', 'created_at': '2025-08-18T16:40:16.017Z', 'cooked': '

Hi! I’m trying to connect HF model at N8N, but I receive error: “NodeOperationError: An error occurred while fetching the blob”. Is it due to I’m not using HF Pro plan?

\n

erro HF841×427 36.4 KB

', 'post_number': 1, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-18T16:40:16.017Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 12, 'reads': 3, 'readers_count': 2, 'score': 75.6, 'yours': False, 'topic_id': 167187, 'topic_slug': 'vet-vetgpt-2-7b-n8n-connection', 'display_username': 'Cristiane Sousa', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102003, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/vet-vetgpt-2-7b-n8n-connection/167187/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 239200, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-19T04:36:31.730Z', 'cooked': '

That model location may be incorrect. Also, that model is not currently deployed, so it should not be available via the API.

', 'post_number': 2, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-19T04:36:31.730Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 3, 'readers_count': 2, 'score': 0.6000000000000001, 'yours': False, 'topic_id': 167187, 'topic_slug': 'vet-vetgpt-2-7b-n8n-connection', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/models?inference_provider=all&sort=trending&search=vetgpt', 'internal': False, 'reflection': False, 'title': 'Models - Hugging Face', 'clicks': 1}, {'url': 'https://huggingface.co/ArcanaBT/vetgpt-2-7b', 'internal': False, 'reflection': False, 'title': 'ArcanaBT/vetgpt-2-7b · Hugging Face', 'clicks': 0}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/vet-vetgpt-2-7b-n8n-connection/167187/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 240301, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-26T13:15:40.680Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 3, 'post_type': 3, 'posts_count': 3, 'updated_at': '2025-08-26T13:15:40.680Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 1, 'readers_count': 0, 'score': 0.2, 'yours': False, 'topic_id': 167187, 'topic_slug': 'vet-vetgpt-2-7b-n8n-connection', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/vet-vetgpt-2-7b-n8n-connection/167187/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi! I’m trying to connect HF model at N8N, but I receive error: “NodeOperationError: An error occurred while fetching the blob”. Is it due to I’m not using HF Pro plan?

-

erro HF841×427 36.4 KB

";"

That model location may be incorrect. Also, that model is not currently deployed, so it should not be available via the API.

";1 -Text-Classification Pipeline - Newbie question;https://discuss.huggingface.co/t/text-classification-pipeline-newbie-question/167640;167640;5;2025-08-22 19:06:44.140000+00:00;"[{'id': 239963, 'name': 'Markus Eicher', 'username': 'MarkusEicher', 'avatar_template': '/user_avatar/discuss.huggingface.co/markuseicher/{size}/52883_2.png', 'created_at': '2025-08-22T19:06:44.198Z', 'cooked': '

Hello huggingface community. I am wondering if I did understand the pipeline text-classification correctly. Is it the case, that the model I choose defines the task I can do with it and the output I will get? I was a bit confused, because I used pipeline(“sentiment-analysis”) but did not find “sentiment-analysis” as a model or option setting. And VSCode autocomplete also did not suggest it, but it still works. So I came to the conclusion I laid out before. Is this correct or am I wrong. Thanks and may you all have a good time.

', 'post_number': 1, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T19:06:44.198Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 11, 'reads': 7, 'readers_count': 6, 'score': 71.4, 'yours': False, 'topic_id': 167640, 'topic_slug': 'text-classification-pipeline-newbie-question', 'display_username': 'Markus Eicher', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 29747, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/text-classification-pipeline-newbie-question/167640/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 239972, 'name': 'Daniel Kleine', 'username': 'dkleine', 'avatar_template': '/user_avatar/discuss.huggingface.co/dkleine/{size}/33964_2.png', 'created_at': '2025-08-22T19:51:01.268Z', 'cooked': '

Hi Markus,

\n

“sentiment-analysis” is the task specifying what you want a large language model to perform on the text. Sentiment analysis practically changes the model’s head to a classifier, which you can see here:

\n\n\n

This pipeline is pre-configured, the settings can be found below in the same file defined here:

\n\n', 'post_number': 2, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T19:51:27.289Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 7, 'readers_count': 6, 'score': 36.4, 'yours': False, 'topic_id': 167640, 'topic_slug': 'text-classification-pipeline-newbie-question', 'display_username': 'Daniel Kleine', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/transformers/blob/7d88f57fc6892b9b3d0092c53e27ae033f1bebc8/src/transformers/pipelines/__init__.py#L193-L205', 'internal': False, 'reflection': False, 'title': 'transformers/src/transformers/pipelines/__init__.py at 7d88f57fc6892b9b3d0092c53e27ae033f1bebc8 · huggingface/transformers · GitHub', 'clicks': 1}, {'url': 'https://github.com/huggingface/transformers/blob/7d88f57fc6892b9b3d0092c53e27ae033f1bebc8/src/transformers/pipelines/__init__.py#L154-L159', 'internal': False, 'reflection': False, 'title': 'transformers/src/transformers/pipelines/__init__.py at 7d88f57fc6892b9b3d0092c53e27ae033f1bebc8 · huggingface/transformers · GitHub', 'clicks': 0}, {'url': 'https://discuss.huggingface.co/t/default-models-for-pipeline-tasks/2559/6', 'internal': True, 'reflection': True, 'title': 'Default models for pipeline tasks', 'clicks': 0}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 2}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 69473, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/text-classification-pipeline-newbie-question/167640/2', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}, {'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 2, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 239973, 'name': 'Markus Eicher', 'username': 'MarkusEicher', 'avatar_template': '/user_avatar/discuss.huggingface.co/markuseicher/{size}/52883_2.png', 'created_at': '2025-08-22T20:11:08.187Z', 'cooked': '

Thank you. So it is generally an alias for text-classification. I was confused because it did not show up as a separate pipeline in chapter 1 of the LLM course on huggingface. But now I understand why. Appreciate your support and the quick answer.

', 'post_number': 3, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T20:11:08.187Z', 'reply_count': 1, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 4, 'reads': 6, 'readers_count': 5, 'score': 56.2, 'yours': False, 'topic_id': 167640, 'topic_slug': 'text-classification-pipeline-newbie-question', 'display_username': 'Markus Eicher', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 69473, 'username': 'dkleine', 'name': 'Daniel Kleine', 'avatar_template': '/user_avatar/discuss.huggingface.co/dkleine/{size}/33964_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 2}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 29747, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/text-classification-pipeline-newbie-question/167640/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 2}], 'current_user_reaction': None, 'reaction_users_count': 2, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 239974, 'name': 'Daniel Kleine', 'username': 'dkleine', 'avatar_template': '/user_avatar/discuss.huggingface.co/dkleine/{size}/33964_2.png', 'created_at': '2025-08-22T20:23:18.891Z', 'cooked': '

That’s right – “sentiment-analysis” practically does sequence classification (there are also other types of classification tasks possible, for example token classification, just fyi) under the hood in the linear output layer of the LLM. Please also see the docstring for the TextClassificationPipeline here:

\n', 'post_number': 4, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T20:23:18.891Z', 'reply_count': 0, 'reply_to_post_number': 3, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 6, 'readers_count': 5, 'score': 46.2, 'yours': False, 'topic_id': 167640, 'topic_slug': 'text-classification-pipeline-newbie-question', 'display_username': 'Daniel Kleine', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/transformers/blob/7d88f57fc6892b9b3d0092c53e27ae033f1bebc8/src/transformers/pipelines/text_classification.py#L49-L79', 'internal': False, 'reflection': False, 'title': 'transformers/src/transformers/pipelines/text_classification.py at 7d88f57fc6892b9b3d0092c53e27ae033f1bebc8 · huggingface/transformers · GitHub', 'clicks': 1}], 'read': True, 'user_title': None, 'reply_to_user': {'id': 29747, 'username': 'MarkusEicher', 'name': 'Markus Eicher', 'avatar_template': '/user_avatar/discuss.huggingface.co/markuseicher/{size}/52883_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 3}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 69473, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/text-classification-pipeline-newbie-question/167640/4', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 2}, {'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 3, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 240000, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-23T08:23:30.049Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 5, 'post_type': 3, 'posts_count': 5, 'updated_at': '2025-08-23T08:23:30.049Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 0.4, 'yours': False, 'topic_id': 167640, 'topic_slug': 'text-classification-pipeline-newbie-question', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/text-classification-pipeline-newbie-question/167640/5', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";

Hello huggingface community. I am wondering if I did understand the pipeline text-classification correctly. Is it the case, that the model I choose defines the task I can do with it and the output I will get? I was a bit confused, because I used pipeline(“sentiment-analysis”) but did not find “sentiment-analysis” as a model or option setting. And VSCode autocomplete also did not suggest it, but it still works. So I came to the conclusion I laid out before. Is this correct or am I wrong. Thanks and may you all have a good time.

;"

Hi Markus,

-

“sentiment-analysis” is the task specifying what you want a large language model to perform on the text. Sentiment analysis practically changes the model’s head to a classifier, which you can see here:

- - -

This pipeline is pre-configured, the settings can be found below in the same file defined here:

-";1 -"ImportError: cannot import name ‘ModelFilter’ from ‘huggingface_hub’";https://discuss.huggingface.co/t/importerror-cannot-import-name-modelfilter-from-huggingface-hub/167632;167632;5;2025-08-22 13:18:09.224000+00:00;"[{'id': 239912, 'name': 'Alex', 'username': 'SuperBowser', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/9f8e36/{size}.png', 'created_at': '2025-08-22T13:18:09.284Z', 'cooked': '

I am running this line in Kaggle notebook:

\n
from huggingface_hub import ModelFilter\n
\n

and getting back error:

\n
---------------------------------------------------------------------------\nImportError                               Traceback (most recent call last)\n/tmp/ipykernel_36/1451250264.py in <cell line: 0>()\n----> 1 from huggingface_hub import ModelFilter\n\nImportError: cannot import name \'ModelFilter\' from \'huggingface_hub\' (/usr/local/lib/python3.11/dist-packages/huggingface_hub/__init__.py)\n
\n

My huggingface_hub._version_ is ‘0.33.1’

', 'post_number': 1, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T13:18:09.284Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 108, 'reads': 6, 'readers_count': 5, 'score': 481.2, 'yours': False, 'topic_id': 167632, 'topic_slug': 'importerror-cannot-import-name-modelfilter-from-huggingface-hub', 'display_username': 'Alex', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102016, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/importerror-cannot-import-name-modelfilter-from-huggingface-hub/167632/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 239950, 'name': 'Daniel Kleine', 'username': 'dkleine', 'avatar_template': '/user_avatar/discuss.huggingface.co/dkleine/{size}/33964_2.png', 'created_at': '2025-08-22T15:21:25.382Z', 'cooked': '

ModelFilter is deprecated, please see here: ImportError: cannot import name \'ModelFilter\' from \'huggingface_hub\' · Issue #2478 · huggingface/huggingface_hub · GitHub

', 'post_number': 2, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T15:21:25.382Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 15, 'reads': 6, 'readers_count': 5, 'score': 96.2, 'yours': False, 'topic_id': 167632, 'topic_slug': 'importerror-cannot-import-name-modelfilter-from-huggingface-hub', 'display_username': 'Daniel Kleine', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/huggingface/huggingface_hub/issues/2478', 'internal': False, 'reflection': False, 'title': ""ImportError: cannot import name 'ModelFilter' from 'huggingface_hub' · Issue #2478 · huggingface/huggingface_hub · GitHub"", 'clicks': 16}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 2}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 69473, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/importerror-cannot-import-name-modelfilter-from-huggingface-hub/167632/2', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 2}], 'current_user_reaction': None, 'reaction_users_count': 2, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 239957, 'name': 'Alex', 'username': 'SuperBowser', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/s/9f8e36/{size}.png', 'created_at': '2025-08-22T17:28:31.353Z', 'cooked': '

Thank you so much for your answer. Do you what values I can use in filter field. I am looking for complete list. So far I know only a few values such text-classification

\n

Minor update. Here is my search:

\n

from huggingface_hub import HfApi
\napi = HfApi()
\nmodels = api.list_models(task=“text-classification”,
\nsort=‘downloads’, gated = False, limit = 100)
\nmodels = list(models)
\nprint(len(models))
\nprint(models[1].modelId)

\n

It returns cross-encoder/ms-marco-MiniLM-L6-v2, which is “Text Ranking” and it is different from what I asked “Text Classification” as per tasks page.
\nI got the same result when using “filter” field.

', 'post_number': 3, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T17:37:59.882Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 2, 'reads': 6, 'readers_count': 5, 'score': 26.2, 'yours': False, 'topic_id': 167632, 'topic_slug': 'importerror-cannot-import-name-modelfilter-from-huggingface-hub', 'display_username': 'Alex', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 2, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/tasks', 'internal': False, 'reflection': False, 'title': 'Tasks - Hugging Face', 'clicks': 0}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 102016, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/importerror-cannot-import-name-modelfilter-from-huggingface-hub/167632/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 239964, 'name': 'Daniel Kleine', 'username': 'dkleine', 'avatar_template': '/user_avatar/discuss.huggingface.co/dkleine/{size}/33964_2.png', 'created_at': '2025-08-22T19:07:25.281Z', 'cooked': '
\n

It returns cross-encoder/ms-marco-MiniLM-L6-v2, which is “Text Ranking” and it is different from what I asked “Text Classification” as per tasks page.
\nI got the same result when using “filter” field.

\n
\n

This is probably because this model is tagged as both as “Text Ranking” as well as “Text Classification”, see tags above:

\n\n', 'post_number': 4, 'post_type': 1, 'posts_count': 5, 'updated_at': '2025-08-22T19:08:35.289Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 5, 'reads': 4, 'readers_count': 3, 'score': 55.8, 'yours': False, 'topic_id': 167632, 'topic_slug': 'importerror-cannot-import-name-modelfilter-from-huggingface-hub', 'display_username': 'Daniel Kleine', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2', 'internal': False, 'reflection': False, 'title': 'cross-encoder/ms-marco-MiniLM-L6-v2 · Hugging Face', 'clicks': 1}, {'url': 'https://huggingface.co/tasks', 'internal': False, 'reflection': False, 'title': 'Tasks - Hugging Face', 'clicks': 0}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 2}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 69473, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/importerror-cannot-import-name-modelfilter-from-huggingface-hub/167632/4', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 2}], 'current_user_reaction': None, 'reaction_users_count': 2, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 239997, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-23T07:07:27.219Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 5, 'post_type': 3, 'posts_count': 5, 'updated_at': '2025-08-23T07:07:27.219Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 0.4, 'yours': False, 'topic_id': 167632, 'topic_slug': 'importerror-cannot-import-name-modelfilter-from-huggingface-hub', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/importerror-cannot-import-name-modelfilter-from-huggingface-hub/167632/5', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

I am running this line in Kaggle notebook:

-
from huggingface_hub import ModelFilter
-
-

and getting back error:

-
---------------------------------------------------------------------------
-ImportError                               Traceback (most recent call last)
-/tmp/ipykernel_36/1451250264.py in <cell line: 0>()
-----> 1 from huggingface_hub import ModelFilter
-
-ImportError: cannot import name 'ModelFilter' from 'huggingface_hub' (/usr/local/lib/python3.11/dist-packages/huggingface_hub/__init__.py)
-
-

My huggingface_hub._version_ is ‘0.33.1’

";"

ModelFilter is deprecated, please see here: ImportError: cannot import name 'ModelFilter' from 'huggingface_hub' · Issue #2478 · huggingface/huggingface_hub · GitHub

";1 -"Tool/Function calling abilities of LLM’s that are used locally pulled through ollama";https://discuss.huggingface.co/t/tool-function-calling-abilities-of-llms-that-are-used-locally-pulled-through-ollama/165277;165277;13;2025-08-01 11:20:02.837000+00:00;"[{'id': 235956, 'name': 'Aravindha Sivabalan J', 'username': 'cranky-coder08', 'avatar_template': '/user_avatar/discuss.huggingface.co/cranky-coder08/{size}/51972_2.png', 'created_at': '2025-08-01T11:20:02.900Z', 'cooked': '

i was trying to build a small AI agent that would query the DB and get the details of the customers, for which i tried many models that are available in the ollama model library, but every model keeps throwing an “invalid tool”, or keeps using the irrelevant tool or keeps hallucinating and giving back made up answers!!! is this an issue that is common when pulling and running LLM’s locally using OLLAMA, when i use the paid Gemini API from google cloud, it works so well (uses the correct tool’s, and returns the exact correct answer), i need help in understanding what is happening when i use a locally run LLM, and is there anyway to make the Local LLM work like the Gemini API??

\n

Thanks in advance

', 'post_number': 1, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-01T11:20:02.900Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 109, 'reads': 5, 'readers_count': 4, 'score': 536.0, 'yours': False, 'topic_id': 165277, 'topic_slug': 'tool-function-calling-abilities-of-llms-that-are-used-locally-pulled-through-ollama', 'display_username': 'Aravindha Sivabalan J', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 100794, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/tool-function-calling-abilities-of-llms-that-are-used-locally-pulled-through-ollama/165277/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 235983, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-01T14:01:03.637Z', 'cooked': '

If you are using Ollama directly without any Agent framework, the models that support tool calling are limited, and there seems to be an issue that is not a bug.

\n

As a workaround, you could use Ollama through external Agent frameworks.

', 'post_number': 2, 'post_type': 1, 'posts_count': 3, 'updated_at': '2025-08-01T14:01:03.637Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 6, 'reads': 5, 'readers_count': 4, 'score': 46.0, 'yours': False, 'topic_id': 165277, 'topic_slug': 'tool-function-calling-abilities-of-llms-that-are-used-locally-pulled-through-ollama', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://discuss.huggingface.co/t/how-to-run-agents-from-smolagents-locally/152874/3', 'internal': True, 'reflection': False, 'title': 'How to run agents from `smolagents` locally?', 'clicks': 12}, {'url': 'https://ollama.com/blog/tool-support', 'internal': False, 'reflection': False, 'title': 'Tool support · Ollama Blog', 'clicks': 9}, {'url': 'https://huggingface.co/posts/prithivMLmods/142876386338407', 'internal': False, 'reflection': False, 'title': '@prithivMLmods on Hugging Face: ""OpenAI, Google, Hugging Face, and Anthropic have released guides and courses…""', 'clicks': 7}, {'url': 'https://github.com/ollama/ollama/issues/11538', 'internal': False, 'reflection': False, 'title': 'Qwen3:14b not using and calling functions with plaintext · Issue #11538 · ollama/ollama · GitHub', 'clicks': 5}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/tool-function-calling-abilities-of-llms-that-are-used-locally-pulled-through-ollama/165277/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 239244, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-19T09:27:01.360Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 3, 'post_type': 3, 'posts_count': 3, 'updated_at': '2025-08-19T09:27:01.360Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 2, 'readers_count': 1, 'score': 0.4, 'yours': False, 'topic_id': 165277, 'topic_slug': 'tool-function-calling-abilities-of-llms-that-are-used-locally-pulled-through-ollama', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/tool-function-calling-abilities-of-llms-that-are-used-locally-pulled-through-ollama/165277/3', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

i was trying to build a small AI agent that would query the DB and get the details of the customers, for which i tried many models that are available in the ollama model library, but every model keeps throwing an “invalid tool”, or keeps using the irrelevant tool or keeps hallucinating and giving back made up answers!!! is this an issue that is common when pulling and running LLM’s locally using OLLAMA, when i use the paid Gemini API from google cloud, it works so well (uses the correct tool’s, and returns the exact correct answer), i need help in understanding what is happening when i use a locally run LLM, and is there anyway to make the Local LLM work like the Gemini API??

-

Thanks in advance

";"

If you are using Ollama directly without any Agent framework, the models that support tool calling are limited, and there seems to be an issue that is not a bug.

-

As a workaround, you could use Ollama through external Agent frameworks.

";1 -"ModuleNotFoundError: No module named ‘transformers’";https://discuss.huggingface.co/t/modulenotfounderror-no-module-named-transformers/11609;11609;9;2021-11-11 21:05:23.353000+00:00;"[{'id': 24972, 'name': 'ardo tee', 'username': 'mashedpotatotime', 'avatar_template': '/user_avatar/discuss.huggingface.co/mashedpotatotime/{size}/3103_2.png', 'created_at': '2021-11-11T21:05:23.422Z', 'cooked': '

Hi! I’ve been having trouble getting transformers to work in Spaces.

\n

When tested in my environment using python -c ""from transformers import pipeline; print(pipeline(\'sentiment-analysis\')(\'we love you\'))"", the results show it’s been properly installed. When imported in Colab it works fine too, but whenever deployed to Spaces it always returns the same ModuleNotFound error. Full traceback message:

\n

Traceback:

\n
File ""/home/user/.local/lib/python3.8/site-packages/streamlit/script_runner.py"", line 354, in _run_script\n    exec(code, module.__dict__)File ""/home/user/app/app.py"", line 1, in <module>\n    from transformers import pipeline\n
\n

It’s a simple test app using transformers and streamlit, - both of which were reinstalled with pip after creating a new venv and reinstalling tensorflow and pytorch. I also tried cleaning, uninstalling, and reinstalling conda based on advice from another forum. No dice.

\n

Currently using:

\n

Python 3.9.4
\nTensorflow 2.7.0
\nPyTorch 1.10.0
\nTransformers 4.12.3
\nStreamlit 1.2.0

\n

Any help greatly appreciated! Thanks

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2021-11-11T21:08:03.051Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 24187, 'reads': 263, 'readers_count': 262, 'score': 120517.6, 'yours': False, 'topic_id': 11609, 'topic_slug': 'modulenotfounderror-no-module-named-transformers', 'display_username': 'ardo tee', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 4950, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/modulenotfounderror-no-module-named-transformers/11609/1', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 24988, 'name': 'Nikhil', 'username': 'NDugar', 'avatar_template': '/user_avatar/discuss.huggingface.co/ndugar/{size}/40501_2.png', 'created_at': '2021-11-12T06:41:54.938Z', 'cooked': '

it might be due to not having a requirements file. Here is an example of what your spaces app should have - flax-community/image-captioning at main try adding the requirements as they till the environment what packages to load. Hope this helps.

', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2021-11-12T06:41:54.938Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 198, 'reads': 221, 'readers_count': 220, 'score': 1114.2, 'yours': False, 'topic_id': 11609, 'topic_slug': 'modulenotfounderror-no-module-named-transformers', 'display_username': 'Nikhil', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://huggingface.co/spaces/flax-community/image-captioning/tree/main', 'internal': False, 'reflection': False, 'title': 'flax-community/image-captioning at main', 'clicks': 2788}], 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 5}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 4732, 'hidden': False, 'trust_level': 2, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/modulenotfounderror-no-module-named-transformers/11609/2', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 5}], 'current_user_reaction': None, 'reaction_users_count': 5, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 26022, 'name': 'ardo tee', 'username': 'mashedpotatotime', 'avatar_template': '/user_avatar/discuss.huggingface.co/mashedpotatotime/{size}/3103_2.png', 'created_at': '2021-11-19T23:23:39.383Z', 'cooked': '

That worked perfectly. Thank you!

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2021-11-19T23:23:39.383Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 137, 'reads': 206, 'readers_count': 205, 'score': 741.2, 'yours': False, 'topic_id': 11609, 'topic_slug': 'modulenotfounderror-no-module-named-transformers', 'display_username': 'ardo tee', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 4732, 'username': 'NDugar', 'name': 'Nikhil', 'avatar_template': '/user_avatar/discuss.huggingface.co/ndugar/{size}/40501_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 4950, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/modulenotfounderror-no-module-named-transformers/11609/3', 'reactions': [{'id': 'heart', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 238096, 'name': 'Yue Zhao', 'username': 'Alwaysboy', 'avatar_template': '/user_avatar/discuss.huggingface.co/alwaysboy/{size}/52486_2.png', 'created_at': '2025-08-12T13:40:25.363Z', 'cooked': '

Same issue and solved by this method, thanks!

', 'post_number': 4, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-12T13:40:25.363Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 12, 'reads': 7, 'readers_count': 6, 'score': 71.4, 'yours': False, 'topic_id': 11609, 'topic_slug': 'modulenotfounderror-no-module-named-transformers', 'display_username': 'Yue Zhao', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 101586, 'hidden': False, 'trust_level': 0, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/modulenotfounderror-no-module-named-transformers/11609/4', 'reactions': [{'id': 'confetti_ball', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

Hi! I’ve been having trouble getting transformers to work in Spaces.

-

When tested in my environment using python -c ""from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"", the results show it’s been properly installed. When imported in Colab it works fine too, but whenever deployed to Spaces it always returns the same ModuleNotFound error. Full traceback message:

-

Traceback:

-
File ""/home/user/.local/lib/python3.8/site-packages/streamlit/script_runner.py"", line 354, in _run_script
-    exec(code, module.__dict__)File ""/home/user/app/app.py"", line 1, in <module>
-    from transformers import pipeline
-
-

It’s a simple test app using transformers and streamlit, - both of which were reinstalled with pip after creating a new venv and reinstalling tensorflow and pytorch. I also tried cleaning, uninstalling, and reinstalling conda based on advice from another forum. No dice.

-

Currently using:

-

Python 3.9.4
-Tensorflow 2.7.0
-PyTorch 1.10.0
-Transformers 4.12.3
-Streamlit 1.2.0

-

Any help greatly appreciated! Thanks

";"

it might be due to not having a requirements file. Here is an example of what your spaces app should have - flax-community/image-captioning at main try adding the requirements as they till the environment what packages to load. Hope this helps.

";1 -The Gradio API is not working;https://discuss.huggingface.co/t/the-gradio-api-is-not-working/166407;166407;5;2025-08-11 13:02:56.970000+00:00;"[{'id': 237842, 'name': 'Dany Gold', 'username': 'GoldDany', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/g/bbce88/{size}.png', 'created_at': '2025-08-11T13:02:57.043Z', 'cooked': '

the gradio throws error: Traceback (most recent call last):
\nFile “C:\\Users\\danya\\PycharmProjects\\DiDefBackend\\DiDef\\SentenceTransformer.py”, line 45, in
\nclient = Client(
\nFile “C:\\Users\\danya\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio_client\\client.py”, line 171, in init
\nself._info = self._get_api_info()
\nFile “C:\\Users\\danya\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio_client\\client.py”, line 564, in get_api_info
\ninfo = r.json()
\nFile “C:\\Users\\danya\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\httpx_models.py”, line 764, in json
\nreturn jsonlib.loads(self.content, **kwargs)
\nFile ""C:\\Users\\danya\\AppData\\Local\\Programs\\Python\\Python39\\lib\\json_init
.py"", line 346, in loads
\nreturn _default_decoder.decode(s)
\nFile “C:\\Users\\danya\\AppData\\Local\\Programs\\Python\\Python39\\lib\\json\\decoder.py”, line 337, in decode
\nobj, end = self.raw_decode(s, idx=_w(s, 0).end())
\nFile “C:\\Users\\danya\\AppData\\Local\\Programs\\Python\\Python39\\lib\\json\\decoder.py”, line 355, in raw_decode
\nraise JSONDecodeError(“Expecting value”, s, err.value) from None
\njson.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

\n

why? My code is very simple:

\n

from gradio_client import Client

\n

client = Client(
\nsrc = “GoldDany/DiDefBackend”, #my Space is public
\n)
\nresult = client.predict(
\ntext=“Hello!!”,
\napi_name=“/predict”,
\n)
\nprint(result)

', 'post_number': 1, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-11T13:05:34.640Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 17, 'reads': 6, 'readers_count': 5, 'score': 86.2, 'yours': False, 'topic_id': 166407, 'topic_slug': 'the-gradio-api-is-not-working', 'display_username': 'Dany Gold', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 101505, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/the-gradio-api-is-not-working/166407/1', 'reactions': [{'id': 'eyes', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True, 'can_vote': False}, {'id': 237845, 'name': 'John Smith', 'username': 'John6666', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png', 'created_at': '2025-08-11T13:53:44.313Z', 'cooked': '
\n

Python39

\n
\n

I think this is probably the culprit this time.

\n

Gradio 5 only works with Python 3.10 or later on both the server and client, so I think the error is occurring because the versions are different between the client and server.
\nI don’t know if this error can be potentially resolved…

\n

The simplest solution is to use Python 3.10 or later.

\n
# pip install -U gradio_client (in Python 3.9 environment)\nimport subprocess\nsubprocess.run(""pip show gradio_client"", shell=True) # Version: 1.3.0 (Release date: 2024.08.08)\nfrom gradio_client import Client\n\nclient = Client(src=""John6666/apitest1"") # Gradio 4.41.0\nresult = client.predict(text=""Hello!!"", api_name=""/predict"")\nprint(result) # [0.010964062064886093, 0.02713009901344776, -0.024556249380111694, 0.01713254489004612, 0.04088324308395386, -0.005583592690527439, 0.015990763902664185,...\n\nclient = Client(src=""GoldDany/DiDefBackend"") # Gradio 5.42.0\nresult = client.predict(text=""Hello!!"", api_name=""/predict"")\nprint(result) # error\n
', 'post_number': 2, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-11T13:54:42.512Z', 'reply_count': 1, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 5, 'readers_count': 4, 'score': 11.0, 'yours': False, 'topic_id': 166407, 'topic_slug': 'the-gradio-api-is-not-working', 'display_username': 'John Smith', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'link_counts': [{'url': 'https://github.com/gradio-app/gradio/issues/9634', 'internal': False, 'reflection': False, 'title': 'Support older versions of python in gradio 5 · Issue #9634 · gradio-app/gradio · GitHub', 'clicks': 1}], 'read': True, 'user_title': 'Regular', 'title_is_group': False, 'bookmarked': False, 'actions_summary': [], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 52272, 'hidden': False, 'trust_level': 3, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/the-gradio-api-is-not-working/166407/2', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': True, 'topic_accepted_answer': True}, {'id': 237851, 'name': 'Dany Gold', 'username': 'GoldDany', 'avatar_template': 'https://avatars.discourse-cdn.com/v4/letter/g/bbce88/{size}.png', 'created_at': '2025-08-11T14:24:40.173Z', 'cooked': '

Thanks) But I may have to use an even lower version python, because integrating it . But downgrading the version of Gradio works))

', 'post_number': 3, 'post_type': 1, 'posts_count': 4, 'updated_at': '2025-08-11T14:24:40.173Z', 'reply_count': 0, 'reply_to_post_number': 2, 'quote_count': 0, 'incoming_link_count': 0, 'reads': 4, 'readers_count': 3, 'score': 15.8, 'yours': False, 'topic_id': 166407, 'topic_slug': 'the-gradio-api-is-not-working', 'display_username': 'Dany Gold', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'reply_to_user': {'id': 52272, 'username': 'John6666', 'name': 'John Smith', 'avatar_template': '/user_avatar/discuss.huggingface.co/john6666/{size}/27664_2.png'}, 'bookmarked': False, 'actions_summary': [{'id': 2, 'count': 1}], 'moderator': False, 'admin': False, 'staff': False, 'user_id': 101505, 'hidden': False, 'trust_level': 1, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'post_url': '/t/the-gradio-api-is-not-working/166407/3', 'reactions': [{'id': '+1', 'type': 'emoji', 'count': 1}], 'current_user_reaction': None, 'reaction_users_count': 1, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}, {'id': 237939, 'name': 'system', 'username': 'system', 'avatar_template': 'https://us1.discourse-cdn.com/hellohellohello/original/2X/d/de4155eb4aa4108ecb32a1389d7cc37ae69f88b7.png', 'created_at': '2025-08-12T02:25:10.323Z', 'cooked': '

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.

', 'post_number': 4, 'post_type': 3, 'posts_count': 4, 'updated_at': '2025-08-12T02:25:10.323Z', 'reply_count': 0, 'reply_to_post_number': None, 'quote_count': 0, 'incoming_link_count': 1, 'reads': 3, 'readers_count': 2, 'score': 5.6, 'yours': False, 'topic_id': 166407, 'topic_slug': 'the-gradio-api-is-not-working', 'display_username': 'system', 'primary_group_name': None, 'flair_name': None, 'flair_url': None, 'flair_bg_color': None, 'flair_color': None, 'flair_group_id': None, 'badges_granted': [], 'version': 1, 'can_edit': False, 'can_delete': False, 'can_recover': False, 'can_see_hidden_post': False, 'can_wiki': False, 'read': True, 'user_title': None, 'bookmarked': False, 'actions_summary': [], 'moderator': True, 'admin': True, 'staff': True, 'user_id': -1, 'hidden': False, 'trust_level': 4, 'deleted_at': None, 'user_deleted': False, 'edit_reason': None, 'can_view_edit_history': True, 'wiki': False, 'action_code': 'autoclosed.enabled', 'post_url': '/t/the-gradio-api-is-not-working/166407/4', 'reactions': [], 'current_user_reaction': None, 'reaction_users_count': 0, 'current_user_used_main_reaction': False, 'can_accept_answer': False, 'can_unaccept_answer': False, 'accepted_answer': False, 'topic_accepted_answer': True}]";"

the gradio throws error: Traceback (most recent call last):
-File “C:\Users\danya\PycharmProjects\DiDefBackend\DiDef\SentenceTransformer.py”, line 45, in
-client = Client(
-File “C:\Users\danya\AppData\Local\Programs\Python\Python39\lib\site-packages\gradio_client\client.py”, line 171, in init
-self._info = self._get_api_info()
-File “C:\Users\danya\AppData\Local\Programs\Python\Python39\lib\site-packages\gradio_client\client.py”, line 564, in get_api_info
-info = r.json()
-File “C:\Users\danya\AppData\Local\Programs\Python\Python39\lib\site-packages\httpx_models.py”, line 764, in json
-return jsonlib.loads(self.content, **kwargs)
-File ""C:\Users\danya\AppData\Local\Programs\Python\Python39\lib\json_init
.py"", line 346, in loads
-return _default_decoder.decode(s)
-File “C:\Users\danya\AppData\Local\Programs\Python\Python39\lib\json\decoder.py”, line 337, in decode
-obj, end = self.raw_decode(s, idx=_w(s, 0).end())
-File “C:\Users\danya\AppData\Local\Programs\Python\Python39\lib\json\decoder.py”, line 355, in raw_decode
-raise JSONDecodeError(“Expecting value”, s, err.value) from None
-json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

-

why? My code is very simple:

-

from gradio_client import Client

-

client = Client(
-src = “GoldDany/DiDefBackend”, #my Space is public
-)
-result = client.predict(
-text=“Hello!!”,
-api_name=“/predict”,
-)
-print(result)

";"
-

Python39

-
-

I think this is probably the culprit this time.

-

Gradio 5 only works with Python 3.10 or later on both the server and client, so I think the error is occurring because the versions are different between the client and server.
-I don’t know if this error can be potentially resolved…

-

The simplest solution is to use Python 3.10 or later.

-
# pip install -U gradio_client (in Python 3.9 environment)
-import subprocess
-subprocess.run(""pip show gradio_client"", shell=True) # Version: 1.3.0 (Release date: 2024.08.08)
-from gradio_client import Client
-
-client = Client(src=""John6666/apitest1"") # Gradio 4.41.0
-result = client.predict(text=""Hello!!"", api_name=""/predict"")
-print(result) # [0.010964062064886093, 0.02713009901344776, -0.024556249380111694, 0.01713254489004612, 0.04088324308395386, -0.005583592690527439, 0.015990763902664185,...
-
-client = Client(src=""GoldDany/DiDefBackend"") # Gradio 5.42.0
-result = client.predict(text=""Hello!!"", api_name=""/predict"")
-print(result) # error
-
";1 \ No newline at end of file