Yilin0601 commited on
Commit
9018a55
·
verified ·
1 Parent(s): f4e0fed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -22
app.py CHANGED
@@ -1,12 +1,14 @@
1
  # app.py
 
2
  import gradio as gr
3
  import torch
4
  import numpy as np
5
  import librosa
6
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
7
 
8
- # 1. Load your model & feature extractor
9
- model_name = "path_or_hub_id_of_your_finetuned_model"
 
10
  model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
11
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
12
 
@@ -14,22 +16,25 @@ model.eval()
14
 
15
  def classify_accuracy(audio):
16
  """
17
- audio: This will be a tuple (sample_rate, audio_data) when using Gradio's microphone or file upload
18
- We need to convert it to the correct format for the model.
19
  """
 
 
 
20
  sample_rate, data = audio
21
 
22
- # Convert audio data to float32 numpy array
23
  if not isinstance(data, np.ndarray):
24
  data = np.array(data)
25
 
26
- # If sample_rate != 16000, resample (optional)
27
- # For small demos, you can do it with librosa
28
- if sample_rate != 16000:
29
- data = librosa.resample(data, orig_sr=sample_rate, target_sr=16000)
30
- sample_rate = 16000
31
 
32
- # Extract features
33
  inputs = feature_extractor(
34
  data,
35
  sampling_rate=sample_rate,
@@ -42,25 +47,28 @@ def classify_accuracy(audio):
42
  logits = outputs.logits
43
  predicted_id = torch.argmax(logits, dim=-1).item()
44
 
45
- # Convert to final accuracy level
46
- accuracy_level = predicted_id + 3 # or however you map 0..7 → 3..10
47
- return f"Accuracy Level: {accuracy_level}"
48
 
49
- # 2. Build Gradio interface
 
 
50
  title = "Speech Accuracy Classifier"
51
- description = "Upload an audio file (or record) to see the predicted accuracy level."
 
 
 
52
 
53
- # We use "microphone=True" in gr.Audio if you want an optional mic input
54
- # By default, "type='numpy'" returns (sample_rate, data)
55
  demo = gr.Interface(
56
  fn=classify_accuracy,
57
- inputs=gr.Audio(source="upload", type="numpy"),
58
- outputs="text",
59
  title=title,
60
  description=description,
61
- allow_flagging="never" # optional
62
  )
63
 
64
- # 3. Launch the Gradio app
65
  if __name__ == "__main__":
66
  demo.launch()
 
1
  # app.py
2
+
3
  import gradio as gr
4
  import torch
5
  import numpy as np
6
  import librosa
7
  from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
8
 
9
+ # 1. Load your fine-tuned model & feature extractor from the Hugging Face Hub or local path
10
+ # Replace "YourUsername/YourModelRepo" with the actual repo ID where your fine-tuned model is hosted
11
+ model_name = "YourUsername/YourModelRepo"
12
  model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
13
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
14
 
 
16
 
17
  def classify_accuracy(audio):
18
  """
19
+ audio: Gradio provides a tuple (sample_rate, data) when type='numpy'.
20
+ We'll convert to the correct format, run inference, and return the predicted level.
21
  """
22
+ if audio is None:
23
+ return "No audio provided."
24
+
25
  sample_rate, data = audio
26
 
27
+ # Ensure the audio is a NumPy array
28
  if not isinstance(data, np.ndarray):
29
  data = np.array(data)
30
 
31
+ # Resample if needed (model expects 16kHz)
32
+ target_sr = 16000
33
+ if sample_rate != target_sr:
34
+ data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
35
+ sample_rate = target_sr
36
 
37
+ # Convert to batch of size 1
38
  inputs = feature_extractor(
39
  data,
40
  sampling_rate=sample_rate,
 
47
  logits = outputs.logits
48
  predicted_id = torch.argmax(logits, dim=-1).item()
49
 
50
+ # Map model output (0..7) back to your desired scale (3..10) if needed
51
+ accuracy_level = predicted_id + 3
 
52
 
53
+ return f"Predicted Accuracy Level: {accuracy_level}"
54
+
55
+ # 2. Build Gradio Interface
56
  title = "Speech Accuracy Classifier"
57
+ description = (
58
+ "Upload an audio file (or record audio) on the left. "
59
+ "The model will classify the audio's accuracy level on the right."
60
+ )
61
 
62
+ # Gradio Interface:
 
63
  demo = gr.Interface(
64
  fn=classify_accuracy,
65
+ inputs=gr.Audio(source="upload", type="numpy"), # left side: audio upload
66
+ outputs="text", # right side: classification result
67
  title=title,
68
  description=description,
69
+ allow_flagging="never" # disable user flagging if you prefer
70
  )
71
 
72
+ # 3. Launch Gradio App
73
  if __name__ == "__main__":
74
  demo.launch()