Spaces:

gladguy
/

SimpleViva

Runtime error

App Files Files Community

SimpleViva / app.py

gladguy

Gradio migration

2464a55 5 months ago

raw

history blame contribute delete

13.4 kB

	import gradio as gr
	import torch
	import numpy as np
	import io
	from scipy.io.wavfile import write
	from transformers import pipeline
	import time
	from typing import Dict, List, Tuple

	# --- TTS Engine ---
	class FreeVoiceTTS:
	def __init__(self):
	self.model = None
	self.device = "cpu"
	self.sample_rate = 24000

	def load_silero_tts(self):
	"""Load Silero TTS - lightweight and reliable"""
	try:
	torch.set_num_threads(4)
	model, example_text = torch.hub.load(
	repo_or_dir='snakers4/silero-models',
	model='silero_tts',
	language='en',
	speaker='v3_en'
	)
	self.silero_model = model
	return True
	except Exception as e:
	print(f"Silero TTS loading failed: {e}")
	return False

	def text_to_speech(self, text: str) -> Tuple[int, np.ndarray]:
	"""Convert text to speech, returning (sample_rate, audio_numpy)"""
	try:
	if not hasattr(self, 'silero_model'):
	if not self.load_silero_tts():
	return None

	# Generate audio using Silero
	audio = self.silero_model.apply_tts(
	text=text,
	speaker='en_0', # English female voice
	sample_rate=self.sample_rate
	)

	# Convert to numpy array for Gradio
	# Silero returns a torch tensor, we convert to numpy
	return (self.sample_rate, audio.numpy())

	except Exception as e:
	print(f"Silero TTS failed: {e}")
	return None

	# --- STT Engine ---
	class SpeechToText:
	def __init__(self):
	self.transcriber = None

	def load_model(self):
	try:
	self.transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
	return True
	except Exception as e:
	print(f"STT loading failed: {e}")
	return False

	def transcribe(self, audio_path: str) -> str:
	if not self.transcriber:
	self.load_model()

	if not audio_path:
	return ""

	try:
	result = self.transcriber(audio_path)
	return result["text"]
	except Exception as e:
	print(f"Transcription failed: {e}")
	return ""

	# --- Application Logic ---

	# Initialize Engines
	tts_engine = FreeVoiceTTS()
	stt_engine = SpeechToText()

	# Pre-load models
	print("Loading AI Models...")
	tts_engine.load_silero_tts()
	stt_engine.load_model()
	print("Models Loaded.")

	QUESTION_BANK = {
	"upper_limb": [
	{
	"question": "Describe the course and distribution of the median nerve from its origin to the hand.",
	"key_points": ["brachial plexus roots C5-T1", "medial and lateral cords", "carpal tunnel", "LOAF muscles"],
	"follow_up": "What clinical condition results from median nerve compression at the wrist?",
	"difficulty": "medium"
	},
	{
	"question": "Explain the brachial plexus in detail, including its major branches.",
	"key_points": ["roots, trunks, divisions, cords, branches", "mnemonic: Real Texans Drink Cold Beer", "musculocutaneous, axillary, radial, median, ulnar nerves"],
	"follow_up": "Which cord of the brachial plexus is most vulnerable in shoulder dislocations?",
	"difficulty": "hard"
	},
	{
	"question": "What are the muscles of the rotator cuff and their functions?",
	"key_points": ["supraspinatus", "infraspinatus", "teres minor", "subscapularis", "SITS mnemonic"],
	"follow_up": "Which rotator cuff muscle is most commonly injured?",
	"difficulty": "medium"
	}
	],
	"lower_limb": [
	{
	"question": "Trace the course of the sciatic nerve from its origin to its terminal branches.",
	"key_points": ["L4-S3 roots", "passes through greater sciatic foramen", "divides into tibial and common fibular nerves", "innervates hamstrings"],
	"follow_up": "What are the clinical manifestations of sciatic nerve injury?",
	"difficulty": "medium"
	},
	{
	"question": "Describe the boundaries and contents of the femoral triangle.",
	"key_points": ["inguinal ligament", "sartorius", "adductor longus", "femoral nerve, artery, vein", "NAVY arrangement"],
	"follow_up": "Why is the femoral triangle important clinically?",
	"difficulty": "medium"
	}
	],
	"cardiology": [
	{
	"question": "Describe the blood supply to the heart and the coronary circulation.",
	"key_points": ["left and right coronary arteries", "circumflex artery", "left anterior descending", "coronary sinus"],
	"follow_up": "Which coronary artery is most commonly involved in myocardial infarction?",
	"difficulty": "medium"
	},
	{
	"question": "Explain the conduction system of the heart.",
	"key_points": ["SA node", "AV node", "bundle of His", "bundle branches", "Purkinje fibers"],
	"follow_up": "What is the clinical significance of the AV node?",
	"difficulty": "hard"
	}
	],
	"neuroanatomy": [
	{
	"question": "Describe the blood supply of the brain.",
	"key_points": ["internal carotid arteries", "vertebral arteries", "circle of Willis", "anterior, middle, posterior cerebral arteries"],
	"follow_up": "What is the clinical consequence of middle cerebral artery occlusion?",
	"difficulty": "hard"
	},
	{
	"question": "Name the twelve cranial nerves and their basic functions.",
	"key_points": ["olfactory, optic, oculomotor, trochlear, trigeminal, abducens, facial, vestibulocochlear, glossopharyngeal, vagus, accessory, hypoglossal"],
	"follow_up": "Which cranial nerve has the longest intracranial course?",
	"difficulty": "medium"
	}
	]
	}

	def start_session(topic):
	if not topic:
	return (
	None,
	[],
	"Please select a topic first.",
	gr.update(visible=False),
	gr.update(visible=True)
	)

	session_state = {
	"topic": topic,
	"question_index": 0,
	"score": 0,
	"history": [],
	"current_question_data": QUESTION_BANK[topic][0]
	}

	first_question = session_state["current_question_data"]["question"]

	# Generate audio for first question
	audio = tts_engine.text_to_speech(first_question)

	return (
	session_state,
	[(None, first_question)], # Chat history
	f"Topic: {topic.replace('_', ' ').title()}",
	gr.update(visible=True), # Show session
	gr.update(visible=False), # Hide topic selection
	audio # Auto-play question
	)

	def process_response(audio_input, text_input, session_state, history):
	if not session_state:
	return session_state, history, "Error: No active session", None, None

	# Determine user answer (Audio takes precedence)
	user_answer = ""
	if audio_input:
	user_answer = stt_engine.transcribe(audio_input)
	elif text_input:
	user_answer = text_input

	if not user_answer:
	return session_state, history, "", None, None # No input

	# Evaluate Answer
	question_data = session_state["current_question_data"]
	score, feedback = evaluate_answer(user_answer, question_data)

	# Update State
	session_state["score"] += score
	session_state["history"].append({
	"question": question_data["question"],
	"answer": user_answer,
	"feedback": feedback,
	"score": score
	})

	# Update Chat History
	history.append((user_answer, feedback))

	# Prepare Next Question
	session_state["question_index"] += 1
	topic_questions = QUESTION_BANK[session_state["topic"]]

	next_audio = None

	if session_state["question_index"] < len(topic_questions):
	next_question_data = topic_questions[session_state["question_index"]]
	session_state["current_question_data"] = next_question_data
	next_q_text = next_question_data["question"]
	history.append((None, next_q_text))

	# Generate audio for next question
	next_audio = tts_engine.text_to_speech(next_q_text)

	else:
	# End of session
	final_score = session_state["score"]
	count = len(topic_questions)
	avg = final_score / count if count > 0 else 0
	end_msg = f"Session Complete! Final Score: {final_score:.1f}/{count*10} (Avg: {avg:.1f})"
	history.append((None, end_msg))
	next_audio = tts_engine.text_to_speech(end_msg)
	session_state = None # Reset state

	return (
	session_state,
	history,
	"", # Clear text input
	None, # Clear audio input
	next_audio
	)

	def evaluate_answer(answer: str, question_data: Dict) -> Tuple[float, str]:
	"""Simple keyword matching evaluation"""
	answer_lower = answer.lower()
	key_points = question_data["key_points"]

	covered_points = sum(1 for point in key_points if any(word in answer_lower for word in point.lower().split()))
	score = min(10, (covered_points / len(key_points)) * 10)

	if score >= 8:
	feedback = f"Excellent! {question_data.get('follow_up', '')}"
	elif score >= 5:
	feedback = f"Good. You missed some details. {question_data.get('follow_up', '')}"
	else:
	missed = [p for p in key_points if not any(w in answer_lower for w in p.lower().split())]
	feedback = f"Key points missed: {', '.join(missed[:2])}. {question_data.get('follow_up', '')}"

	return score, feedback

	# --- Gradio UI ---

	with gr.Blocks(title="Anatomy Viva Voce", theme=gr.themes.Soft()) as demo:
	state = gr.State(None) # Session state

	gr.Markdown("# 🧠 Anatomy Viva Voce Simulator")
	gr.Markdown("Practice medical anatomy with an AI Professor. Speak or type your answers!")

	# Topic Selection View
	with gr.Group(visible=True) as topic_view:
	gr.Markdown("### Select a Topic to Begin")
	with gr.Row():
	btn_upper = gr.Button("Upper Limb", variant="primary")
	btn_lower = gr.Button("Lower Limb", variant="primary")
	btn_cardio = gr.Button("Cardiology", variant="primary")
	btn_neuro = gr.Button("Neuroanatomy", variant="primary")

	# Session View
	with gr.Group(visible=False) as session_view:
	session_info = gr.Markdown("Topic: ...")

	chatbot = gr.Chatbot(label="Viva Session", height=400)

	# Professor Audio Output (Hidden player, auto-played via return)
	professor_audio = gr.Audio(label="Professor's Voice", autoplay=True, visible=False)

	with gr.Row():
	with gr.Column(scale=4):
	txt_input = gr.Textbox(
	show_label=False,
	placeholder="Type your answer here...",
	lines=2
	)
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	source="microphone",
	type="filepath",
	label="Voice Answer",
	show_label=False
	)

	with gr.Row():
	submit_btn = gr.Button("Submit Answer", variant="primary")
	end_btn = gr.Button("End Session", variant="stop")

	# Event Handlers
	topic_buttons = [btn_upper, btn_lower, btn_cardio, btn_neuro]
	topics = ["upper_limb", "lower_limb", "cardiology", "neuroanatomy"]

	for btn, topic in zip(topic_buttons, topics):
	btn.click(
	fn=start_session,
	inputs=[gr.State(topic)],
	outputs=[state, chatbot, session_info, session_view, topic_view, professor_audio]
	)

	# Submit via Text or Audio
	submit_inputs = [audio_input, txt_input, state, chatbot]
	submit_outputs = [state, chatbot, txt_input, audio_input, professor_audio]

	submit_btn.click(fn=process_response, inputs=submit_inputs, outputs=submit_outputs)
	txt_input.submit(fn=process_response, inputs=submit_inputs, outputs=submit_outputs)
	audio_input.change(fn=process_response, inputs=submit_inputs, outputs=submit_outputs) # Auto-submit on stop recording? Maybe better to require button for audio to avoid accidental submits.
	# Actually, let's NOT auto-submit audio on change, user might want to re-record.
	# But `change` triggers when recording stops. Let's stick to button for now to be safe, or add a specific listener.
	# For now, let's keep it simple: User records, then clicks submit.
	# Wait, `audio_input.change` is triggered when file is updated.

	def reset_ui():
	return None, [], gr.update(visible=False), gr.update(visible=True)

	end_btn.click(
	fn=reset_ui,
	inputs=None,
	outputs=[state, chatbot, session_view, topic_view]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)