Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,3 @@
|
|
| 1 |
-
# Copyright 2024 Christopher Woodyard
|
| 2 |
-
#
|
| 3 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
-
# you may not use this file except in compliance with the License.
|
| 5 |
-
# You may obtain a copy of the License at
|
| 6 |
-
#
|
| 7 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
-
#
|
| 9 |
-
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
-
# See the License for the specific language governing permissions and
|
| 13 |
-
# limitations under the License.
|
| 14 |
-
|
| 15 |
import gradio as gr
|
| 16 |
from groq import Groq
|
| 17 |
import os
|
|
@@ -39,19 +25,34 @@ def transcribe_audio(audio):
|
|
| 39 |
if audio is None:
|
| 40 |
return ""
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
with open(audio_path, "rb") as audio_file:
|
| 45 |
-
audio_data = audio_file.read()
|
| 46 |
-
|
| 47 |
-
# Transcribe the audio using Distil-Whisper
|
| 48 |
-
transcription = client.audio.transcriptions.create(
|
| 49 |
-
file=(os.path.basename(audio_path), audio_data),
|
| 50 |
-
model="distil-whisper-large-v3-en",
|
| 51 |
-
response_format="verbose_json",
|
| 52 |
-
)
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
except Exception as e:
|
| 56 |
logging.error(f"Error in transcription: {str(e)}")
|
| 57 |
return f"Error in transcription: {str(e)}"
|
|
@@ -117,6 +118,7 @@ def generate_tutor_output(subject: str, difficulty: str, student_input: str) ->
|
|
| 117 |
"""
|
| 118 |
|
| 119 |
try:
|
|
|
|
| 120 |
completion = client.chat.completions.create(
|
| 121 |
messages=[
|
| 122 |
{
|
|
@@ -131,14 +133,33 @@ def generate_tutor_output(subject: str, difficulty: str, student_input: str) ->
|
|
| 131 |
model="llama3-groq-70b-8192-tool-use-preview",
|
| 132 |
max_tokens=2000,
|
| 133 |
)
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
except Exception as e:
|
| 137 |
logging.error(f"Error generating tutor output: {str(e)}")
|
| 138 |
return {"error": f"Failed to generate tutor output: {str(e)}"}
|
| 139 |
|
| 140 |
def process_output(output: Dict[str, Any]) -> Tuple[str, str, str, str]:
|
| 141 |
try:
|
|
|
|
| 142 |
lesson = markdown2.markdown(str(output.get("lesson", "No lesson available")))
|
| 143 |
example = markdown2.markdown(str(output.get("example", "No example available")))
|
| 144 |
real_world = markdown2.markdown(str(output.get("real_world_problem", "No real-world problem available")))
|
|
@@ -146,7 +167,7 @@ def process_output(output: Dict[str, Any]) -> Tuple[str, str, str, str]:
|
|
| 146 |
return lesson, example, real_world, quiz
|
| 147 |
except Exception as e:
|
| 148 |
logging.error(f"Error processing output: {str(e)}")
|
| 149 |
-
return str(e), "", "", ""
|
| 150 |
|
| 151 |
def create_interface() -> gr.Blocks:
|
| 152 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
@@ -157,19 +178,28 @@ def create_interface() -> gr.Blocks:
|
|
| 157 |
subject = gr.Dropdown(
|
| 158 |
["Art History", "Computer Science", "Literature", "Math", "Music", "Science", "Social Science"],
|
| 159 |
label="Subject",
|
| 160 |
-
info="Choose the subject of your lesson"
|
|
|
|
| 161 |
)
|
| 162 |
difficulty = gr.Radio(
|
| 163 |
["Primary", "Secondary", "Higher Education"],
|
| 164 |
label="Difficulty Level",
|
| 165 |
-
info="Select your proficiency level"
|
|
|
|
| 166 |
)
|
| 167 |
student_input = gr.Textbox(
|
| 168 |
placeholder="Type your topic or question here...",
|
| 169 |
label="Type Your Question",
|
| 170 |
info="Enter the topic you want to explore"
|
| 171 |
)
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
with gr.Row():
|
| 174 |
submit_button = gr.Button("📚 Teach Me", variant="primary")
|
| 175 |
clear_button = gr.Button("🧹 Clear", variant="secondary")
|
|
@@ -204,21 +234,28 @@ def create_interface() -> gr.Blocks:
|
|
| 204 |
|
| 205 |
def process_input(subject, difficulty, text_input, audio_input):
|
| 206 |
try:
|
| 207 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
transcribed_text = transcribe_audio(audio_input)
|
| 209 |
student_input = transcribed_text
|
| 210 |
else:
|
| 211 |
-
|
| 212 |
-
transcribed_text = ""
|
| 213 |
|
| 214 |
logging.info(f"Processing input: subject={subject}, difficulty={difficulty}, student_input={student_input}")
|
| 215 |
|
|
|
|
|
|
|
|
|
|
| 216 |
tutor_output = generate_tutor_output(subject, difficulty, student_input)
|
| 217 |
lesson, example, real_world, quiz = process_output(tutor_output)
|
| 218 |
return transcribed_text, lesson, example, real_world, quiz
|
| 219 |
except Exception as e:
|
| 220 |
logging.error(f"Error in process_input: {str(e)}")
|
| 221 |
-
|
|
|
|
| 222 |
|
| 223 |
def clear_outputs():
|
| 224 |
return [""] * 5 # Clear all five output fields
|
|
@@ -232,7 +269,7 @@ def create_interface() -> gr.Blocks:
|
|
| 232 |
clear_button.click(
|
| 233 |
fn=clear_outputs,
|
| 234 |
inputs=[],
|
| 235 |
-
outputs=[transcription_output, lesson_output, example_output, real_world_output, quiz_output]
|
| 236 |
)
|
| 237 |
|
| 238 |
return demo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from groq import Groq
|
| 3 |
import os
|
|
|
|
| 25 |
if audio is None:
|
| 26 |
return ""
|
| 27 |
|
| 28 |
+
# Check if audio is a path string or a file object
|
| 29 |
+
audio_path = audio if isinstance(audio, str) else audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
# Handle audio file properly based on gradio's audio component output
|
| 32 |
+
if os.path.exists(audio_path):
|
| 33 |
+
with open(audio_path, "rb") as audio_file:
|
| 34 |
+
audio_data = audio_file.read()
|
| 35 |
+
|
| 36 |
+
# Use the correct API endpoint and parameters for Groq's audio transcription
|
| 37 |
+
# Note: Groq may have updated their API structure for audio transcriptions
|
| 38 |
+
try:
|
| 39 |
+
transcription = client.audio.transcriptions.create(
|
| 40 |
+
file=("audio.wav", audio_data), # Use generic filename with proper content
|
| 41 |
+
model="distil-whisper-large-v3-en",
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Check the structure of the response
|
| 45 |
+
if hasattr(transcription, 'text'):
|
| 46 |
+
return transcription.text
|
| 47 |
+
else:
|
| 48 |
+
# Handle different response structure
|
| 49 |
+
return transcription.get('text', "Transcription succeeded but returned no text")
|
| 50 |
+
except AttributeError:
|
| 51 |
+
# Fallback if the API structure has changed
|
| 52 |
+
logging.warning("Audio transcription API structure may have changed, using fallback method")
|
| 53 |
+
return "Audio transcription failed. Please type your question instead."
|
| 54 |
+
else:
|
| 55 |
+
return "Audio file not found. Please try recording again."
|
| 56 |
except Exception as e:
|
| 57 |
logging.error(f"Error in transcription: {str(e)}")
|
| 58 |
return f"Error in transcription: {str(e)}"
|
|
|
|
| 118 |
"""
|
| 119 |
|
| 120 |
try:
|
| 121 |
+
# Make sure we're using the correct model and parameters
|
| 122 |
completion = client.chat.completions.create(
|
| 123 |
messages=[
|
| 124 |
{
|
|
|
|
| 133 |
model="llama3-groq-70b-8192-tool-use-preview",
|
| 134 |
max_tokens=2000,
|
| 135 |
)
|
| 136 |
+
|
| 137 |
+
# Handle the response parsing more robustly
|
| 138 |
+
response_content = completion.choices[0].message.content
|
| 139 |
+
|
| 140 |
+
# Ensure we're getting valid JSON
|
| 141 |
+
try:
|
| 142 |
+
return json.loads(response_content)
|
| 143 |
+
except json.JSONDecodeError:
|
| 144 |
+
# If the response isn't valid JSON, try to extract and format it manually
|
| 145 |
+
logging.warning("Failed to parse JSON response, attempting to format manually")
|
| 146 |
+
|
| 147 |
+
# Create a fallback response structure
|
| 148 |
+
fallback_response = {
|
| 149 |
+
"lesson": "The AI generated a response that couldn't be properly formatted. Here's the raw output:\n\n" + response_content,
|
| 150 |
+
"example": "Could not parse example from response.",
|
| 151 |
+
"real_world_problem": "Could not parse real-world application from response.",
|
| 152 |
+
"quiz": "Could not parse quiz from response."
|
| 153 |
+
}
|
| 154 |
+
return fallback_response
|
| 155 |
+
|
| 156 |
except Exception as e:
|
| 157 |
logging.error(f"Error generating tutor output: {str(e)}")
|
| 158 |
return {"error": f"Failed to generate tutor output: {str(e)}"}
|
| 159 |
|
| 160 |
def process_output(output: Dict[str, Any]) -> Tuple[str, str, str, str]:
|
| 161 |
try:
|
| 162 |
+
# Use markdown2 to convert markdown to HTML, with fallbacks for missing content
|
| 163 |
lesson = markdown2.markdown(str(output.get("lesson", "No lesson available")))
|
| 164 |
example = markdown2.markdown(str(output.get("example", "No example available")))
|
| 165 |
real_world = markdown2.markdown(str(output.get("real_world_problem", "No real-world problem available")))
|
|
|
|
| 167 |
return lesson, example, real_world, quiz
|
| 168 |
except Exception as e:
|
| 169 |
logging.error(f"Error processing output: {str(e)}")
|
| 170 |
+
return f"Error processing output: {str(e)}", "", "", ""
|
| 171 |
|
| 172 |
def create_interface() -> gr.Blocks:
|
| 173 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
|
| 178 |
subject = gr.Dropdown(
|
| 179 |
["Art History", "Computer Science", "Literature", "Math", "Music", "Science", "Social Science"],
|
| 180 |
label="Subject",
|
| 181 |
+
info="Choose the subject of your lesson",
|
| 182 |
+
value="Math" # Set a default value
|
| 183 |
)
|
| 184 |
difficulty = gr.Radio(
|
| 185 |
["Primary", "Secondary", "Higher Education"],
|
| 186 |
label="Difficulty Level",
|
| 187 |
+
info="Select your proficiency level",
|
| 188 |
+
value="Secondary" # Set a default value
|
| 189 |
)
|
| 190 |
student_input = gr.Textbox(
|
| 191 |
placeholder="Type your topic or question here...",
|
| 192 |
label="Type Your Question",
|
| 193 |
info="Enter the topic you want to explore"
|
| 194 |
)
|
| 195 |
+
# Updated audio input configuration
|
| 196 |
+
audio_input = gr.Audio(
|
| 197 |
+
type="filepath",
|
| 198 |
+
label="Speak Your Question",
|
| 199 |
+
sources=["microphone"],
|
| 200 |
+
format="wav" # Explicitly specify format
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
with gr.Row():
|
| 204 |
submit_button = gr.Button("📚 Teach Me", variant="primary")
|
| 205 |
clear_button = gr.Button("🧹 Clear", variant="secondary")
|
|
|
|
| 234 |
|
| 235 |
def process_input(subject, difficulty, text_input, audio_input):
|
| 236 |
try:
|
| 237 |
+
# Prioritize text input if both are provided
|
| 238 |
+
if text_input and text_input.strip():
|
| 239 |
+
student_input = text_input
|
| 240 |
+
transcribed_text = "Using text input instead of audio."
|
| 241 |
+
elif audio_input:
|
| 242 |
transcribed_text = transcribe_audio(audio_input)
|
| 243 |
student_input = transcribed_text
|
| 244 |
else:
|
| 245 |
+
return "No input provided. Please type a question or record audio.", "Please provide a question to begin.", "", "", ""
|
|
|
|
| 246 |
|
| 247 |
logging.info(f"Processing input: subject={subject}, difficulty={difficulty}, student_input={student_input}")
|
| 248 |
|
| 249 |
+
if not student_input or student_input.strip() == "":
|
| 250 |
+
return "Input was empty or could not be processed.", "Please provide a valid question.", "", "", ""
|
| 251 |
+
|
| 252 |
tutor_output = generate_tutor_output(subject, difficulty, student_input)
|
| 253 |
lesson, example, real_world, quiz = process_output(tutor_output)
|
| 254 |
return transcribed_text, lesson, example, real_world, quiz
|
| 255 |
except Exception as e:
|
| 256 |
logging.error(f"Error in process_input: {str(e)}")
|
| 257 |
+
error_message = f"Error processing your request: {str(e)}"
|
| 258 |
+
return error_message, error_message, "", "", ""
|
| 259 |
|
| 260 |
def clear_outputs():
|
| 261 |
return [""] * 5 # Clear all five output fields
|
|
|
|
| 269 |
clear_button.click(
|
| 270 |
fn=clear_outputs,
|
| 271 |
inputs=[],
|
| 272 |
+
outputs=[transcription_output, student_input, lesson_output, example_output, real_world_output, quiz_output]
|
| 273 |
)
|
| 274 |
|
| 275 |
return demo
|