config.py · Darveht/ZenVision-AI-Subtitle-Generator at main

ZenVision-AI-Subtitle-Generator / config.py

Upload config.py with huggingface_hub

c9d4539 verified 5 months ago

7.35 kB

	"""
	ZenVision AI Subtitle Generator - Configuration
	Configuración avanzada del modelo de 3GB+
	"""

	import os
	from dataclasses import dataclass
	from typing import Dict, List, Optional

	@dataclass
	class ModelConfig:
	"""Configuración de modelos de IA"""

	# Whisper Configuration
	whisper_model_size: str = "large-v2" # tiny, base, small, medium, large, large-v2
	whisper_device: str = "auto" # auto, cuda, cpu, mps

	# Translation Models
	translation_model: str = "Helsinki-NLP/opus-mt-en-mul"
	use_google_translate: bool = True

	# Sentiment Analysis
	sentiment_model: str = "cardiffnlp/twitter-roberta-base-sentiment-latest"

	# Emotion Detection
	emotion_model: str = "j-hartmann/emotion-english-distilroberta-base"

	# BERT Configuration
	bert_model: str = "bert-base-multilingual-cased"

	# spaCy Models
	spacy_models: Dict[str, str] = None

	def __post_init__(self):
	if self.spacy_models is None:
	self.spacy_models = {
	"en": "en_core_web_sm",
	"es": "es_core_news_sm",
	"fr": "fr_core_news_sm",
	"de": "de_core_news_sm",
	"it": "it_core_news_sm",
	"pt": "pt_core_news_sm"
	}

	@dataclass
	class ProcessingConfig:
	"""Configuración de procesamiento"""

	# Audio Processing
	sample_rate: int = 16000
	audio_format: str = "wav"

	# Video Processing
	video_codec: str = "libx264"
	audio_codec: str = "aac"

	# Subtitle Configuration
	max_chars_per_line: int = 42
	max_lines_per_subtitle: int = 2
	min_subtitle_duration: float = 1.0
	max_subtitle_duration: float = 7.0

	# Language Support
	supported_languages: List[str] = None

	def __post_init__(self):
	if self.supported_languages is None:
	self.supported_languages = [
	"es", "en", "fr", "de", "it", "pt",
	"zh", "ja", "ko", "ru", "ar", "hi"
	]

	@dataclass
	class UIConfig:
	"""Configuración de interfaz de usuario"""

	# Gradio Configuration
	server_name: str = "0.0.0.0"
	server_port: int = 7860
	share: bool = False

	# Theme and Styling
	theme: str = "soft"
	title: str = "ZenVision AI Subtitle Generator"

	# File Upload Limits
	max_file_size: int = 500 * 1024 * 1024 # 500MB
	allowed_video_formats: List[str] = None

	def __post_init__(self):
	if self.allowed_video_formats is None:
	self.allowed_video_formats = [
	".mp4", ".avi", ".mov", ".mkv", ".webm",
	".flv", ".wmv", ".m4v", ".3gp"
	]

	@dataclass
	class SystemConfig:
	"""Configuración del sistema"""

	# Cache and Storage
	cache_dir: str = os.path.expanduser("~/.zenvision/cache")
	models_dir: str = os.path.expanduser("~/.zenvision/models")
	temp_dir: str = "/tmp/zenvision"

	# Performance
	max_workers: int = 4
	batch_size: int = 16

	# Memory Management
	max_memory_usage: float = 0.8 # 80% of available RAM
	clear_cache_on_exit: bool = True

	# Logging
	log_level: str = "INFO"
	log_file: Optional[str] = None

	class ZenVisionConfig:
	"""Configuración principal de ZenVision"""

	def __init__(self):
	self.model = ModelConfig()
	self.processing = ProcessingConfig()
	self.ui = UIConfig()
	self.system = SystemConfig()

	# Load from environment variables
	self._load_from_env()

	# Create directories
	self._create_directories()

	def _load_from_env(self):
	"""Carga configuración desde variables de entorno"""

	# Model configuration
	if os.getenv("ZENVISION_WHISPER_MODEL"):
	self.model.whisper_model_size = os.getenv("ZENVISION_WHISPER_MODEL")

	if os.getenv("ZENVISION_DEVICE"):
	self.model.whisper_device = os.getenv("ZENVISION_DEVICE")

	# UI configuration
	if os.getenv("ZENVISION_PORT"):
	self.ui.server_port = int(os.getenv("ZENVISION_PORT"))

	if os.getenv("ZENVISION_SHARE"):
	self.ui.share = os.getenv("ZENVISION_SHARE").lower() == "true"

	# System configuration
	if os.getenv("ZENVISION_CACHE_DIR"):
	self.system.cache_dir = os.getenv("ZENVISION_CACHE_DIR")

	if os.getenv("ZENVISION_MAX_WORKERS"):
	self.system.max_workers = int(os.getenv("ZENVISION_MAX_WORKERS"))

	def _create_directories(self):
	"""Crea directorios necesarios"""
	directories = [
	self.system.cache_dir,
	self.system.models_dir,
	self.system.temp_dir
	]

	for directory in directories:
	os.makedirs(directory, exist_ok=True)

	def get_model_path(self, model_name: str) -> str:
	"""Obtiene la ruta de un modelo"""
	return os.path.join(self.system.models_dir, model_name)

	def get_cache_path(self, cache_name: str) -> str:
	"""Obtiene la ruta de cache"""
	return os.path.join(self.system.cache_dir, cache_name)

	def to_dict(self) -> Dict:
	"""Convierte configuración a diccionario"""
	return {
	"model": self.model.__dict__,
	"processing": self.processing.__dict__,
	"ui": self.ui.__dict__,
	"system": self.system.__dict__
	}

	# Configuración global
	config = ZenVisionConfig()

	# Emotion color mapping
	EMOTION_COLORS = {
	"joy": "#FFD700", # Gold
	"sadness": "#4169E1", # Royal Blue
	"anger": "#DC143C", # Crimson
	"fear": "#8A2BE2", # Blue Violet
	"surprise": "#FF8C00", # Dark Orange
	"disgust": "#32CD32", # Lime Green
	"neutral": "#FFFFFF", # White
	"love": "#FF69B4", # Hot Pink
	"optimism": "#00FF7F", # Spring Green
	"pessimism": "#696969" # Dim Gray
	}

	# Language mappings
	LANGUAGE_NAMES = {
	"es": "Español",
	"en": "English",
	"fr": "Français",
	"de": "Deutsch",
	"it": "Italiano",
	"pt": "Português",
	"zh": "中文",
	"ja": "日本語",
	"ko": "한국어",
	"ru": "Русский",
	"ar": "العربية",
	"hi": "हिन्दी"
	}

	# Model size information
	MODEL_SIZES = {
	"whisper": {
	"tiny": "39 MB",
	"base": "74 MB",
	"small": "244 MB",
	"medium": "769 MB",
	"large": "1550 MB",
	"large-v2": "1550 MB"
	},
	"bert-multilingual": "400 MB",
	"roberta-sentiment": "200 MB",
	"distilroberta-emotion": "300 MB",
	"translation-models": "500 MB"
	}

	# Performance benchmarks
	PERFORMANCE_BENCHMARKS = {
	"accuracy": {
	"transcription": {
	"en": 0.972,
	"es": 0.958,
	"fr": 0.945,
	"de": 0.931,
	"it": 0.948,
	"pt": 0.952
	},
	"translation": {
	"en-es": 0.89,
	"en-fr": 0.87,
	"en-de": 0.84,
	"es-en": 0.91,
	"fr-en": 0.88
	},
	"emotion_detection": 0.85,
	"sentiment_analysis": 0.94
	},
	"speed": {
	"cpu_i7": 0.3, # x real time
	"gpu_rtx3080": 2.1, # x real time
	"gpu_rtx4090": 3.8 # x real time
	}
	}