Mirror agentic-intelligence-lab/elephant-rerank-v1-text-small from ModelScope

69eb4b7 verified 7 days ago

814 Bytes

	{
	"model_name": "llm-semantic-router/mmbert-32k-yarn",
	"output_dir": "/workspace/outputs/mmbert-rerank-32k-2d-matryoshka",
	"use_2d_matryoshka": true,
	"layer_indices": "3,6,11,22",
	"dim_indices": "768,512,256,128,64",
	"pooling_strategy": "cls",
	"train_data": "/workspace/data/bge-m3/bge-m3-data",
	"max_length": 32768,
	"max_samples": null,
	"negatives_per_query": 3,
	"use_quora": false,
	"use_fever": false,
	"max_quora_samples": 100000,
	"max_fever_samples": 100000,
	"epochs": 1,
	"batch_size": 16,
	"gradient_accumulation_steps": 2,
	"learning_rate": 2e-05,
	"weight_decay": 0.01,
	"warmup_ratio": 0.1,
	"max_grad_norm": 1.0,
	"use_flash_attn": true,
	"bf16": true,
	"gradient_checkpointing": true,
	"num_workers": 4,
	"logging_steps": 100,
	"save_steps": 5000,
	"seed": 42
	}