llm-semantic-router
/

multi-modal-embed-large

Sentence Similarity

sentence-transformers

text-image-audio

semantic-router

Eval Results (legacy)

Model card Files Files and versions

multi-modal-embed-large / config.json

HuaminChen's picture

Upload multi-modal-embed-large final model

e21cde3 verified 23 days ago

history blame contribute delete

1.19 kB

	{
	"seed": 42,
	"output_dir": "/scratch/hf_st_mm_outputs/server_datacenter_8gpu_tri_encoder",
	"model": {
	"text_encoder_name": "llm-semantic-router/mmbert-embed-32k-2d-matryoshka",
	"image_encoder_name": "google/siglip2-so400m-patch14-384",
	"audio_encoder_name": "openai/whisper-medium",
	"embedding_dim": 768,
	"max_text_length": 32768
	},
	"training": {
	"epochs": 10,
	"batch_size": 12,
	"grad_accum_steps": 8,
	"num_workers": 4,
	"prefetch_factor": 4,
	"shard_prefetch": 2,
	"shard_cache_limit": 4,
	"sequential_shard_loading": true,
	"shuffle": false,
	"modality_homogeneous_batches": false,
	"learning_rate": 1e-05,
	"weight_decay": 0.01,
	"warmup_ratio": 0.1,
	"max_grad_norm": 1.0,
	"mixed_precision": "bf16",
	"log_every": 10,
	"save_every": 2000,
	"hard_negative_ratio": 0.5
	},
	"loss": {
	"type": "cached_mnrl",
	"scale": 20.0
	},
	"data": {
	"cache_dir": "/scratch/2dmse-data/server_full_datacenter_cache/train"
	},
	"validation": {
	"cache_dir": "/scratch/2dmse-data/server_full_datacenter_cache/val",
	"num_workers": 2,
	"shard_prefetch": 1,
	"shard_cache_limit": 2
	}
	}