Luis J Camargo commited on
Commit
59c3c55
Β·
1 Parent(s): 9218955

Map language code to inali_name and update UI label

Browse files
Files changed (2) hide show
  1. app.py +16 -2
  2. languages.json +0 -0
app.py CHANGED
@@ -8,6 +8,7 @@ from transformers import WhisperProcessor, AutoConfig, AutoModel, WhisperConfig,
8
  from transformers.models.whisper.modeling_whisper import WhisperEncoder
9
  import torch.nn as nn
10
  import psutil
 
11
 
12
  # --- CONFIGURATION ---
13
  MAX_AUDIO_SECONDS = 30
@@ -132,6 +133,16 @@ model.eval()
132
  # Initialize LabelExtractor to build text mappings
133
  label_extractor = LabelExtractor(processor.tokenizer)
134
 
 
 
 
 
 
 
 
 
 
 
135
  print("Model loaded successfully!")
136
 
137
  def get_mem_usage():
@@ -195,7 +206,10 @@ def predict_language(audio_path):
195
  # Strip the "<|" and "|>" tags if present for a cleaner UI
196
  fam_text = label_extractor.family_labels[fam_idx].strip("<|>") if fam_idx < len(label_extractor.family_labels) else f"Unknown Fam ({fam_idx})"
197
  super_text = label_extractor.super_labels[super_idx].strip("<|>") if super_idx < len(label_extractor.super_labels) else f"Unknown Super ({super_idx})"
198
- code_text = label_extractor.code_labels[code_idx].strip("<|>") if code_idx < len(label_extractor.code_labels) else f"Unknown Code ({code_idx})"
 
 
 
199
 
200
  print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
201
  print(f"--- [LOG] Request Finished ---\n")
@@ -236,7 +250,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"))
236
  gr.Markdown("### πŸ“Š 2. Classification Results")
237
  fam_output = gr.Label(num_top_classes=1, label="🌍 Language Family")
238
  super_output = gr.Label(num_top_classes=1, label="πŸ—£οΈ Superlanguage")
239
- code_output = gr.Label(num_top_classes=1, label="πŸ”€ Language Code")
240
 
241
  submit_btn.click(
242
  fn=predict_language,
 
8
  from transformers.models.whisper.modeling_whisper import WhisperEncoder
9
  import torch.nn as nn
10
  import psutil
11
+ import json
12
 
13
  # --- CONFIGURATION ---
14
  MAX_AUDIO_SECONDS = 30
 
133
  # Initialize LabelExtractor to build text mappings
134
  label_extractor = LabelExtractor(processor.tokenizer)
135
 
136
+ # Load languages mapping
137
+ print("Loading language mappings...")
138
+ try:
139
+ with open("languages.json", "r", encoding="utf-8") as f:
140
+ languages_data = json.load(f)
141
+ CODE_TO_NAME = {item.get("code"): item.get("inali_name") for item in languages_data if item.get("code") and item.get("inali_name")}
142
+ except Exception as e:
143
+ print(f"Warning: Could not load languages.json: {e}")
144
+ CODE_TO_NAME = {}
145
+
146
  print("Model loaded successfully!")
147
 
148
  def get_mem_usage():
 
206
  # Strip the "<|" and "|>" tags if present for a cleaner UI
207
  fam_text = label_extractor.family_labels[fam_idx].strip("<|>") if fam_idx < len(label_extractor.family_labels) else f"Unknown Fam ({fam_idx})"
208
  super_text = label_extractor.super_labels[super_idx].strip("<|>") if super_idx < len(label_extractor.super_labels) else f"Unknown Super ({super_idx})"
209
+ code_raw = label_extractor.code_labels[code_idx].strip("<|>") if code_idx < len(label_extractor.code_labels) else f"Unknown Code ({code_idx})"
210
+
211
+ # Apply inali_name mapping
212
+ code_text = f"{CODE_TO_NAME[code_raw]} ({code_raw})" if code_raw in CODE_TO_NAME else code_raw
213
 
214
  print(f"[LOG] Final Memory: {get_mem_usage():.2f} MB")
215
  print(f"--- [LOG] Request Finished ---\n")
 
250
  gr.Markdown("### πŸ“Š 2. Classification Results")
251
  fam_output = gr.Label(num_top_classes=1, label="🌍 Language Family")
252
  super_output = gr.Label(num_top_classes=1, label="πŸ—£οΈ Superlanguage")
253
+ code_output = gr.Label(num_top_classes=1, label="πŸ”€ Language")
254
 
255
  submit_btn.click(
256
  fn=predict_language,
languages.json ADDED
The diff for this file is too large to render. See raw diff