TeszenAI
/

MTP3.6

Text Generation

Model card Files Files and versions

MTP3.6 / config.yaml

teszenofficial's picture

Upload config.yaml

f078257 verified 4 months ago

history blame contribute delete

2.34 kB

	# MTP Mini - Configuración Optimizada 20x Más Grande e Inteligente

	model:
	vocab_size: 8000 # 2x más vocabulario
	d_model: 1024 # 2x dimensión (512 → 1024)
	n_layers: 24 # 3x capas (8 → 24)
	n_heads: 16 # 2x cabezas (8 → 16)
	d_ff: 4096 # 4x d_model
	max_seq_len: 2048 # 4x contexto (512 → 2048)
	dropout: 0.15 # Dropout optimizado
	use_swiglu: true # Mejor activación
	use_flash_attention: true # Atención optimizada
	use_confidence_scoring: true # Anti-alucinación
	min_confidence: 0.3

	training:
	batch_size: 2 # Pequeño para modelo grande
	accumulation_steps: 16 # Effective batch = 32
	epochs: 25 # 25 épocas como pediste
	learning_rate: 0.0002 # LR bajo para estabilidad
	min_lr: 0.000005
	weight_decay: 0.15 # Regularización fuerte
	max_grad_norm: 0.5
	num_threads: 4
	save_every: 5 # Guardar cada 5 épocas

	# Early stopping (para no perder info)
	patience: 10 # Muy paciente (espera 10 épocas sin mejora)
	min_delta: 0.0003 # Mejora mínima aceptable

	# Learning rate
	warmup_steps: 500
	use_lr_scheduler: true

	# Regularización
	label_smoothing: 0.15
	use_eos_loss_weight: true
	eos_weight: 3.0

	# Optimizaciones GPU
	use_gradient_checkpointing: true # Ahorra VRAM
	use_fp16: true # Mixed precision

	data:
	corpus_path: corpus/mtp_mini_corpus.jsonl
	min_text_length: 100
	max_text_length: 4000
	validation_split: 0.2 # 20% para validación

	# Augmentación
	use_augmentation: true
	augmentation_prob: 0.4

	generation:
	default_max_tokens: 300
	default_temperature: 0.65
	default_top_k: 50
	default_top_p: 0.9
	default_repetition_penalty: 1.2
	min_response_length: 30

	# Anti-alucinación
	use_perplexity_filter: true
	max_perplexity: 80.0
	use_entropy_threshold: true
	max_entropy: 4.0

	# Control de calidad
	use_confidence_filter: true
	min_confidence_threshold: 0.3

	stop_sequences:
	- "###"
	- "\n\n\n\n"
	- "Instrucción:"
	- "Usuario:"

	# Optimización de memoria
	memory:
	use_fp16: true
	use_gradient_checkpointing: true
	max_memory_gb: 14