Spaces:

VeuReu
/

engine

Sleeping

App Files Files Community

engine / config.yaml

VeuReu

Update config.yaml

cc083dd verified 4 months ago

raw

history blame contribute delete

4.12 kB

	# ===========================
	# Veureu Engine – config.yaml
	# ===========================

	engine:
	output_root: "results"

	api:
	cors_allow_origins: ["*"]
	sync_timeout_seconds: 3600

	database:
	enabled: true
	persist_directory: "chroma_db"
	enable_face_recognition: true
	enable_voice_recognition: true
	face_collection: "index_faces"
	voice_collection: "index_voices"

	jobs:
	enabled: false # si activas cola async, cámbialo a true y añade JobManager en main_api.py
	max_workers: 1
	result_ttl_seconds: 86400

	video_processing:
	keyframes:
	conditional_extraction:
	enable: true
	min_scene_length_seconds: 1.5
	difference_threshold: 28.0

	frames_per_second:
	enable: true
	fps: 1.0 # Frecuencia de frames de análisis

	ocr:
	engine: "tesseract" # "tesseract" \| "easyocr"
	language_hint: "spa"
	tesseract_cmd: "" # si no está en PATH, deja la ruta

	faces:
	detector_model: "mtcnn" # ajusta a tu vision_tools
	embedding_model: "Facenet512" # usado por FaceOfImageEmbedding
	min_face_size: 32
	detection_confidence: 0.85

	ocr_clustering:
	method: "sequential_similarity"
	sentence_transformer: "all-MiniLM-L6-v2"
	similarity_threshold: 0.60 # mayor ⇒ menos clusters

	audio_processing:
	sample_rate: 16000
	format: "wav"

	diarization:
	enabled: true
	force_silence_only: true # Use silence-based segmentation (no pyannote)
	min_segment_duration: 0.5 # en segundos (clips cortos)
	max_segment_duration: 10.0
	silence_thresh: -40 # dBFS threshold for silence detection
	min_silence_len: 500 # milliseconds

	enable_voice_embeddings: true # SpeechBrain ECAPA
	speaker_embedding:
	enabled: true

	# Identificación de hablantes (clustering + Chroma)
	voice_processing:
	speaker_identification:
	enabled: true
	find_optimal_clusters: true
	min_speakers: 1
	max_speakers: 5
	distance_threshold: 0.40

	asr:
	# Controla la transcripción del audio completo además de los clips (útil para contexto global)
	enable_full_transcription: true

	background_descriptor:
	montage:
	enable: true
	max_frames: 12
	grid: "auto"

	description:
	model: "salamandra-vision" # o "gpt-4o-mini"
	max_tokens: 512
	temperature: 0.2

	identity:
	timeline_mapping:
	per_second_frames_source: "frames_per_second"
	attach_faces_to:
	- "keyframes"
	- "audio_segments"
	out_key: "persona"

	narration:
	model: "salamandra-instruct" # "salamandra-instruct" \| "gpt-4o-mini"
	une_guidelines_path: "UNE_153010.txt"
	timing:
	max_ad_duration_ratio: 0.60
	min_gap_seconds: 1.20
	min_ad_seconds: 0.80
	llm:
	max_tokens: 1024
	temperature: 0.2

	subtitles:
	max_chars_per_line: 42
	max_lines_per_cue: 10
	speaker_display: "brackets" # "brackets" \| "prefix" \| "none"

	models:
	# alias de tarea → modelo
	instruct: "salamandra-instruct"
	vision: "salamandra-vision"
	tools: "salamandra-tools"
	asr: "whisper-catalan" # apunta al Space veureu/asr (Aina: faster-whisper-large-v3-ca-3catparla)

	routing:
	use_remote_for:
	- "salamandra-instruct"
	- "salamandra-vision"
	- "salamandra-tools"
	- "whisper-catalan"

	remote_spaces:
	user: "veureu"

	endpoints:
	salamandra-instruct:
	space: "schat"
	base_url: "https://veureu-schat.hf.space"
	client: "gradio"
	predict_route: "/predict"

	salamandra-vision:
	space: "svision"
	base_url: "https://veureu-svision.hf.space"
	client: "gradio"
	predict_route: "/predict"

	salamandra-tools:
	space: "stools"
	base_url: "https://veureu-stools.hf.space"
	client: "gradio"
	predict_route: "/predict"

	whisper-catalan:
	space: "asr"
	base_url: "https://veureu-asr.hf.space"
	client: "gradio"
	predict_route: "/predict"

	http:
	timeout_seconds: 180
	retries: 3
	backoff_seconds: 2.0

	security:
	use_hf_token: true
	hf_token_env: "HF_TOKEN"
	allow_insecure_tls: false

	logging:
	level: "INFO"
	json: false

	stools: false