Spaces:
Sleeping
Sleeping
Luis J Camargo commited on
Commit Β·
59c3c55
1
Parent(s): 9218955
Map language code to inali_name and update UI label
Browse files- app.py +16 -2
- languages.json +0 -0
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from transformers import WhisperProcessor, AutoConfig, AutoModel, WhisperConfig,
|
|
| 8 |
from transformers.models.whisper.modeling_whisper import WhisperEncoder
|
| 9 |
import torch.nn as nn
|
| 10 |
import psutil
|
|
|
|
| 11 |
|
| 12 |
# --- CONFIGURATION ---
|
| 13 |
MAX_AUDIO_SECONDS = 30
|
|
@@ -132,6 +133,16 @@ model.eval()
|
|
| 132 |
# Initialize LabelExtractor to build text mappings
|
| 133 |
label_extractor = LabelExtractor(processor.tokenizer)
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
print("Model loaded successfully!")
|
| 136 |
|
| 137 |
def get_mem_usage():
|
|
@@ -195,7 +206,10 @@ def predict_language(audio_path):
|
|
| 195 |
# Strip the "<|" and "|>" tags if present for a cleaner UI
|
| 196 |
fam_text = label_extractor.family_labels[fam_idx].strip("<|>") if fam_idx < len(label_extractor.family_labels) else f"Unknown Fam ({fam_idx})"
|
| 197 |
super_text = label_extractor.super_labels[super_idx].strip("<|>") if super_idx < len(label_extractor.super_labels) else f"Unknown Super ({super_idx})"
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
|
| 201 |
print(f"--- [LOG] Request Finished ---\n")
|
|
@@ -236,7 +250,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"))
|
|
| 236 |
gr.Markdown("### π 2. Classification Results")
|
| 237 |
fam_output = gr.Label(num_top_classes=1, label="π Language Family")
|
| 238 |
super_output = gr.Label(num_top_classes=1, label="π£οΈ Superlanguage")
|
| 239 |
-
code_output = gr.Label(num_top_classes=1, label="π€ Language
|
| 240 |
|
| 241 |
submit_btn.click(
|
| 242 |
fn=predict_language,
|
|
|
|
| 8 |
from transformers.models.whisper.modeling_whisper import WhisperEncoder
|
| 9 |
import torch.nn as nn
|
| 10 |
import psutil
|
| 11 |
+
import json
|
| 12 |
|
| 13 |
# --- CONFIGURATION ---
|
| 14 |
MAX_AUDIO_SECONDS = 30
|
|
|
|
| 133 |
# Initialize LabelExtractor to build text mappings
|
| 134 |
label_extractor = LabelExtractor(processor.tokenizer)
|
| 135 |
|
| 136 |
+
# Load languages mapping
|
| 137 |
+
print("Loading language mappings...")
|
| 138 |
+
try:
|
| 139 |
+
with open("languages.json", "r", encoding="utf-8") as f:
|
| 140 |
+
languages_data = json.load(f)
|
| 141 |
+
CODE_TO_NAME = {item.get("code"): item.get("inali_name") for item in languages_data if item.get("code") and item.get("inali_name")}
|
| 142 |
+
except Exception as e:
|
| 143 |
+
print(f"Warning: Could not load languages.json: {e}")
|
| 144 |
+
CODE_TO_NAME = {}
|
| 145 |
+
|
| 146 |
print("Model loaded successfully!")
|
| 147 |
|
| 148 |
def get_mem_usage():
|
|
|
|
| 206 |
# Strip the "<|" and "|>" tags if present for a cleaner UI
|
| 207 |
fam_text = label_extractor.family_labels[fam_idx].strip("<|>") if fam_idx < len(label_extractor.family_labels) else f"Unknown Fam ({fam_idx})"
|
| 208 |
super_text = label_extractor.super_labels[super_idx].strip("<|>") if super_idx < len(label_extractor.super_labels) else f"Unknown Super ({super_idx})"
|
| 209 |
+
code_raw = label_extractor.code_labels[code_idx].strip("<|>") if code_idx < len(label_extractor.code_labels) else f"Unknown Code ({code_idx})"
|
| 210 |
+
|
| 211 |
+
# Apply inali_name mapping
|
| 212 |
+
code_text = f"{CODE_TO_NAME[code_raw]} ({code_raw})" if code_raw in CODE_TO_NAME else code_raw
|
| 213 |
|
| 214 |
print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
|
| 215 |
print(f"--- [LOG] Request Finished ---\n")
|
|
|
|
| 250 |
gr.Markdown("### π 2. Classification Results")
|
| 251 |
fam_output = gr.Label(num_top_classes=1, label="π Language Family")
|
| 252 |
super_output = gr.Label(num_top_classes=1, label="π£οΈ Superlanguage")
|
| 253 |
+
code_output = gr.Label(num_top_classes=1, label="π€ Language")
|
| 254 |
|
| 255 |
submit_btn.click(
|
| 256 |
fn=predict_language,
|
languages.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|