Luis J Camargo commited on
Commit
314a4a3
Β·
1 Parent(s): 9d375e9

feat: app update with UI improvements and example texts

Browse files
Files changed (3) hide show
  1. app.py +41 -10
  2. icon.png +0 -0
  3. results.txt +176 -0
app.py CHANGED
@@ -231,12 +231,41 @@ def predict_language(audio_path, fam_k=1, fam_thresh=0.0, super_k=1, super_thres
231
  print(f"Error during inference: {e}")
232
  raise gr.Error(f"Processing failed: {str(e)}")
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  # === UI COMPONENTS ===
235
  with gr.Blocks(theme=gr.themes.Ocean()) as demo:
236
  gr.HTML(
237
- """
238
  <div style="text-align: center; padding: 30px; background: linear-gradient(120deg, rgb(2, 132, 199) 0%, rgb(16, 185, 129) 60%, rgb(5, 150, 105) 100%); color: white; border-radius: 15px; margin-bottom: 25px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);">
239
- <h1 style="color: white; margin: 0; font-size: 2.5em;">🦑 Tachiwin Language Identifier 🦑</h1>
 
240
  <p style="font-size: 1.2em; opacity: 0.9; margin-top: 10px;">Identify any of the 68 languages of Mexico and their 360 variants</p>
241
  </div>
242
  """
@@ -292,16 +321,18 @@ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
292
  gr.Markdown(
293
  """
294
  ---
295
- ### ℹ️ About this Model
296
- Tachiwin Multilingual Language Classifier is a finetune/fork or encoded-only whisper architecture trained to recognize any of the 68 indigenous superlanguages of MΓ©xico and their 360 variants.
297
- **Accuracy Overview:**
298
- - **Language Family**: ~73%
299
- - **Superlanguage**: ~59%
300
- - **Language Code**: ~52%
301
-
302
- *Developed by Tachiwin. May the indigenous languages never be lost.*
303
  """
304
  )
 
 
 
 
 
 
 
 
305
 
306
  if __name__ == "__main__":
307
  demo.launch(ssr_mode=False)
 
231
  print(f"Error during inference: {e}")
232
  raise gr.Error(f"Processing failed: {str(e)}")
233
 
234
+ import base64
235
+ import re
236
+
237
+ # --- Load icon.png as base64 ---
238
+ try:
239
+ with open("icon.png", "rb") as image_file:
240
+ encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
241
+ icon_html = f'<img src="data:image/png;base64,{encoded_string}" style="height: 80px; margin-bottom: 10px;" alt="Tachiwin Icon" />'
242
+ except Exception as e:
243
+ icon_html = ""
244
+
245
+ # --- Load curated examples ---
246
+ curated_examples = []
247
+ try:
248
+ with open("results.txt", "r", encoding="utf-8") as f:
249
+ results_content = f.read()
250
+
251
+ # Split by the sample header
252
+ sample_blocks = re.split(r'πŸ“Š Sample (\d+)', results_content)
253
+ for i in range(1, len(sample_blocks), 2):
254
+ sample_num = sample_blocks[i]
255
+ block = sample_blocks[i+1].strip()
256
+ audio_path = f"samples/sample{sample_num}.wav"
257
+ if os.path.exists(audio_path):
258
+ curated_examples.append([audio_path, f"Sample {sample_num}:\n{block}"])
259
+ except Exception as e:
260
+ print(f"Warning: Could not parse results.txt: {e}")
261
+
262
  # === UI COMPONENTS ===
263
  with gr.Blocks(theme=gr.themes.Ocean()) as demo:
264
  gr.HTML(
265
+ f"""
266
  <div style="text-align: center; padding: 30px; background: linear-gradient(120deg, rgb(2, 132, 199) 0%, rgb(16, 185, 129) 60%, rgb(5, 150, 105) 100%); color: white; border-radius: 15px; margin-bottom: 25px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);">
267
+ {icon_html}
268
+ <h1 style="color: white; margin: 0; font-size: 2.5em;">🦑 Tachiwin AudioId 🦑</h1>
269
  <p style="font-size: 1.2em; opacity: 0.9; margin-top: 10px;">Identify any of the 68 languages of Mexico and their 360 variants</p>
270
  </div>
271
  """
 
321
  gr.Markdown(
322
  """
323
  ---
324
+ ### πŸ“‹ Curated Samples for Reference
325
+ Select any of the sample audios below to load them for inference. The expected results (Truth) and model predictions from the finetune phase are shown below for reference.
 
 
 
 
 
 
326
  """
327
  )
328
+
329
+ with gr.Row():
330
+ example_text = gr.Textbox(label="Sample Ground Truth & Finetune Prediction", interactive=False, lines=6)
331
+
332
+ gr.Examples(
333
+ examples=curated_examples,
334
+ inputs=[audio_input, example_text],
335
+ )
336
 
337
  if __name__ == "__main__":
338
  demo.launch(ssr_mode=False)
icon.png ADDED
results.txt ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ πŸ“Š Sample 0
2
+ Truth: Mayense β†’ Ch'ol β†’ ctu
3
+ Pred: Mayense β†’ Ch'ol β†’ ctu
4
+ Match: 1.00 βœ…
5
+ Family: βœ…
6
+ Super: βœ…
7
+ Code: βœ…
8
+
9
+ πŸ“Š Sample 60
10
+ Truth: Mayense β†’ Tzeltal β†’ tzh
11
+ Pred: Mayense β†’ Maya β†’ yua
12
+ Match: 0.33 βœ…
13
+ Family: βœ…
14
+ Super: ❌
15
+ Code: ❌
16
+
17
+
18
+ πŸ“Š Sample 200
19
+ Truth: Yuto-Nahua β†’ Mayo β†’ mfy
20
+ Pred: Yuto-Nahua β†’ Mayo β†’ mfy
21
+ Match: 1.00 βœ…
22
+ Family: βœ…
23
+ Super: βœ…
24
+ Code: βœ…
25
+
26
+ πŸ“Š Sample 220
27
+ Truth: Yuto-Nahua β†’ NΓ‘huatl β†’ nlv
28
+ Pred: Yuto-Nahua β†’ NΓ‘huatl β†’ ngu
29
+ Match: 0.67 βœ…
30
+ Family: βœ…
31
+ Super: βœ…
32
+ Code: ❌
33
+
34
+ πŸ“Š Sample 240
35
+ Truth: Mayense β†’ Maya β†’ yua
36
+ Pred: Mayense β†’ Maya β†’ yua
37
+ Match: 1.00 βœ…
38
+ Family: βœ…
39
+ Super: βœ…
40
+ Code: βœ…
41
+
42
+ πŸ“Š Sample 260
43
+ Truth: Yuto-Nahua β†’ Yaqui β†’ yaq
44
+ Pred: Yuto-Nahua β†’ Yaqui β†’ yaq
45
+ Match: 1.00 βœ…
46
+ Family: βœ…
47
+ Super: βœ…
48
+ Code: βœ…
49
+
50
+ πŸ“Š Sample 280
51
+ Truth: Otomangue β†’ Chatino β†’ ctp
52
+ Pred: Otomangue β†’ Chatino β†’ ctp
53
+ Match: 1.00 βœ…
54
+ Family: βœ…
55
+ Super: βœ…
56
+ Code: βœ…
57
+
58
+ πŸ“Š Sample 400
59
+ Truth: Otomangue β†’ Zapoteco β†’ zaa
60
+ Pred: Otomangue β†’ Zapoteco β†’ zaa
61
+ Match: 1.00 βœ…
62
+ Family: βœ…
63
+ Super: βœ…
64
+ Code: βœ…
65
+
66
+ πŸ“Š Sample 420
67
+ Truth: Otomangue β†’ Mazahua β†’ mmc
68
+ Pred: Otomangue β†’ Mazahua β†’ mmc
69
+ Match: 1.00 βœ…
70
+ Family: βœ…
71
+ Super: βœ…
72
+ Code: βœ…
73
+
74
+ πŸ“Š Sample 440
75
+ Truth: Yuto-Nahua β†’ Cora β†’ crn
76
+ Pred: Yuto-Nahua β†’ Cora β†’ crn
77
+ Match: 1.00 βœ…
78
+ Family: βœ…
79
+ Super: βœ…
80
+ Code: βœ…
81
+
82
+ πŸ“Š Sample 480
83
+ Truth: Otomangue β†’ Mixteco β†’ mix
84
+ Pred: Otomangue β†’ Mixteco β†’ mix
85
+ Match: 1.00 βœ…
86
+ Family: βœ…
87
+ Super: βœ…
88
+ Code: βœ…
89
+
90
+ πŸ“Š Sample 600
91
+ Truth: Yuto-Nahua β†’ GuarijΓ­o β†’ var
92
+ Pred: Yuto-Nahua β†’ GuarijΓ­o β†’ var
93
+ Match: 1.00 βœ…
94
+ Family: βœ…
95
+ Super: βœ…
96
+ Code: βœ…
97
+
98
+ πŸ“Š Sample 660
99
+ Truth: Yuto-Nahua β†’ Mayo β†’ mfy
100
+ Pred: Yuto-Nahua β†’ Mayo β†’ mfy
101
+ Match: 1.00 βœ…
102
+ Family: βœ…
103
+ Super: βœ…
104
+ Code: βœ…
105
+
106
+ πŸ“Š Sample 820
107
+ Truth: Otomangue β†’ Chatino β†’ ctp
108
+ Pred: Otomangue β†’ Chatino β†’ ctp
109
+ Match: 1.00 βœ…
110
+ Family: βœ…
111
+ Super: βœ…
112
+ Code: βœ…
113
+
114
+ πŸ“Š Sample 1020
115
+ Truth: Mayense β†’ Tzeltal β†’ tzh
116
+ Pred: Mayense β†’ Tzeltal β†’ tzh
117
+ Match: 1.00 βœ…
118
+ Family: βœ…
119
+ Super: βœ…
120
+ Code: βœ…
121
+
122
+ πŸ“Š Sample 1080
123
+ Truth: Yuto-Nahua β†’ Huichol β†’ hch
124
+ Pred: Yuto-Nahua β†’ Huichol β†’ hch
125
+ Match: 1.00 βœ…
126
+ Family: βœ…
127
+ Super: βœ…
128
+ Code: βœ…
129
+
130
+ πŸ“Š Sample 1100
131
+ Truth: Otomangue β†’ Triqui β†’ trs
132
+ Pred: Otomangue β†’ Triqui β†’ trs
133
+ Match: 1.00 βœ…
134
+ Family: βœ…
135
+ Super: βœ…
136
+ Code: βœ…
137
+
138
+ πŸ“Š Sample 1180
139
+ Truth: Otomangue β†’ OtomΓ­ β†’ ott
140
+ Pred: Otomangue β†’ OtomΓ­ β†’ ott
141
+ Match: 1.00 βœ…
142
+ Family: βœ…
143
+ Super: βœ…
144
+ Code: βœ…
145
+
146
+ πŸ“Š Sample 1200
147
+ Truth: Otomangue β†’ Zapoteco β†’ zaa
148
+ Pred: Otomangue β†’ Zapoteco β†’ zaa
149
+ Match: 1.00 βœ…
150
+ Family: βœ…
151
+ Super: βœ…
152
+ Code: βœ…
153
+
154
+ πŸ“Š Sample 1480
155
+ Truth: Mayense β†’ Tojolabal β†’ toj
156
+ Pred: Mayense β†’ Tojolabal β†’ tzh
157
+ Match: 0.67 βœ…
158
+ Family: βœ…
159
+ Super: βœ…
160
+ Code: ❌
161
+
162
+ πŸ“Š Sample 1580
163
+ Truth: PurΓ©pecha β†’ PurΓ©pecha β†’ tsz
164
+ Pred: PurΓ©pecha β†’ PurΓ©pecha β†’ tsz
165
+ Match: 1.00 βœ…
166
+ Family: βœ…
167
+ Super: βœ…
168
+ Code: βœ…
169
+
170
+ πŸ“Š Sample 1880
171
+ Truth: Yuto-Nahua β†’ Tepehuano β†’ ntp
172
+ Pred: Yuto-Nahua β†’ Tepehuano β†’ ntp
173
+ Match: 1.00 βœ…
174
+ Family: βœ…
175
+ Super: βœ…
176
+ Code: βœ