Spaces:

Shouryahere
/

infy

Running

infy / app.py

shourya

Use explicit Spaces GPU decorators

d1105a7 21 days ago

25.3 kB

	"""
	HuggingFace Enabling Sessions - Gradio Interactive Demo App
	Hosted on HuggingFace Spaces
	"""

	import gradio as gr
	import config
	import utils
	import pandas as pd

	try:
	import spaces
	except Exception:
	class _SpacesFallback:
	@staticmethod
	def GPU(func=None, args, *kwargs):
	if func is None:
	def decorator(inner_func):
	return inner_func
	return decorator
	return func

	spaces = _SpacesFallback()

	# ===================== UTILITIES =====================

	def load_sample_texts():
	"""Load sample texts from CSV."""
	try:
	df = pd.read_csv(config.SAMPLE_DATA_CSV)
	return df
	except:
	return None


	def get_sentiment_examples():
	"""Get example texts for sentiment analysis."""
	try:
	with open(f"{config.DEMO_SAMPLES_DIR}/sentiment.txt") as f:
	lines = f.read().strip().split("\n")
	return lines
	except:
	return config.TASKS["sentiment"]["example"].split()


	def get_ner_examples():
	"""Get example texts for NER."""
	try:
	with open(f"{config.DEMO_SAMPLES_DIR}/ner.txt") as f:
	lines = f.read().strip().split("\n")
	return lines
	except:
	return [config.TASKS["ner"]["example"]]


	def get_qa_examples():
	"""Get example context and questions for QA."""
	try:
	with open(f"{config.DEMO_SAMPLES_DIR}/qa.txt") as f:
	contexts = f.read().strip().split("\n\n")
	return contexts
	except:
	return [config.TASKS["qa"]["example_context"]]


	def get_summarization_examples():
	"""Get example texts for summarization."""
	try:
	with open(f"{config.DEMO_SAMPLES_DIR}/summarization.txt") as f:
	lines = f.read().strip().split("\n")
	return lines
	except:
	return [config.TASKS["summarization"]["example"]]


	def get_embeddings_examples():
	"""Get example texts for semantic similarity."""
	try:
	with open(f"{config.DEMO_SAMPLES_DIR}/embeddings.txt") as f:
	lines = f.read().strip().split("\n")
	return lines
	except:
	return [config.TASKS["similarity"]["example1"], config.TASKS["similarity"]["example2"]]


	# ===================== SENTIMENT ANALYSIS =====================

	@spaces.GPU
	def demo_sentiment(text):
	"""Demo sentiment analysis."""
	if not text.strip():
	return "Please enter some text", "No input"
	result = utils.run_sentiment_analysis(text)
	output = f"Label: {result['label']}\n\nConfidence: {result['score']:.4f}"
	return output, result


	# ===================== NER =====================

	@spaces.GPU
	def demo_ner(text):
	"""Demo named entity recognition."""
	if not text.strip():
	return "Please enter some text", "No entities"
	results = utils.run_ner(text)
	if results and isinstance(results, list) and isinstance(results[0], dict) and "error" in results[0]:
	return f"Error: {results[0]['error']}", {"error": results[0]["error"]}
	formatted = utils.format_ner_output(results)
	return formatted, results


	# ===================== QUESTION ANSWERING =====================

	@spaces.GPU
	def demo_qa(context, question):
	"""Demo question answering."""
	if not context.strip() or not question.strip():
	return "Please enter both context and question", {}
	result = utils.run_qa(context, question)
	if "error" in result:
	return f"Error: {result['error']}", {}
	output = f"Answer: {result['answer']}\n\nConfidence: {result['score']:.4f}"
	return output, result


	# ===================== SUMMARIZATION =====================

	@spaces.GPU
	def demo_summarization(text):
	"""Demo text summarization."""
	if not text.strip():
	return "Please enter some text", {"error": "Please enter some text"}
	if len(text.split()) < 20:
	return "Text too short for summarization. Please provide at least 20 words.", {
	"error": "Text too short for summarization. Please provide at least 20 words."
	}
	summary = utils.run_summarization(text)
	if summary.startswith("Error:"):
	return summary, {"error": summary}
	return summary, {"summary": summary}


	# ===================== SEMANTIC SIMILARITY =====================

	@spaces.GPU
	def demo_similarity(text1, text2):
	"""Demo semantic similarity."""
	if not text1.strip() or not text2.strip():
	return "Please enter both texts", 0
	similarity = utils.compute_similarity(text1, text2)
	if isinstance(similarity, str):
	return similarity, 0
	output = f"Similarity Score: {similarity:.4f}\n\n(Score ranges from -1 to 1, where 1 means identical semantically)"
	return output, similarity


	# ===================== TOKENIZATION =====================

	@spaces.GPU
	def demo_tokenization(text):
	"""Demo tokenization."""
	if not text.strip():
	return "Please enter some text", ""
	result = utils.tokenize_text(text)
	if "error" in result:
	return f"Error: {result['error']}", ""
	formatted = utils.format_tokenizer_output(result)
	return formatted, result


	# ===================== GRADIO INTERFACE =====================

	def create_interface():
	"""Create the Gradio interface with 3 tabs."""

	with gr.Blocks(
	title="HuggingFace Enabling Sessions",
	) as app:
	gr.Markdown(
	"""
	# 🤗 HuggingFace Enabling Sessions
	Interactive Demo for Transformers, Hub APIs, and Pipeline Abstractions

	Duration: Session 1: 45 min \| Session 2: 90 min
	"""
	)

	with gr.Tabs():
	# ===================== TAB 1: SESSION 1 - INTRODUCTION =====================
	with gr.Tab("Session 1: Introduction (45 min)", id="session1"):
	gr.Markdown(
	"""
	## 🎯 Introduction to Hugging Face Ecosystem

	### What We'll Cover:
	1. HuggingFace Platform Overview
	- The Hub: Central repository for models, datasets, and spaces
	- Transformers Library: Core Python library for NLP
	- Model Cards: Documentation and metadata for transparency

	2. Core Abstractions
	- Pipelines: High-level API for common tasks (sentiment, NER, QA, etc.)
	- Models & Tokenizers: Lower-level building blocks
	- Datasets: Standardized data loading and processing

	3. Architecture Patterns
	- Encoders: BERT, RoBERTa, DistilBERT → Classification, feature extraction
	- Decoders: GPT-2, GPT-3 → Text generation
	- Encoder-Decoders: T5, BART → Seq2seq (translation, summarization, QA)

	4. Enterprise NLP Landscape
	- Open-source vs. Commercial models
	- Licensing considerations (MIT, Apache, OpenRAIL, etc.)
	- Fine-tuning for domain-specific tasks

	---

	### Live Demo: Explore the Power of Pipelines

	Try the demos below to see how easy it is to use pre-trained models! 👇
	"""
	)

	with gr.Group():
	gr.Markdown("### 📊 Demo 1: Sentiment Analysis")
	demo1_input = gr.Textbox(
	label="Enter text to analyze sentiment",
	value="I absolutely love this product!",
	lines=2,
	)
	demo1_btn = gr.Button("Analyze Sentiment", variant="primary")
	demo1_output = gr.Markdown(label="Result")
	demo1_json = gr.JSON(label="Raw Output", visible=False)

	demo1_btn.click(
	demo_sentiment,
	inputs=[demo1_input],
	outputs=[demo1_output, demo1_json],
	)

	with gr.Group():
	gr.Markdown("### 🏷️ Demo 2: Named Entity Recognition (NER)")
	demo2_input = gr.Textbox(
	label="Enter text for entity recognition",
	value="Apple Inc. was founded by Steve Jobs in Cupertino, California.",
	lines=2,
	)
	demo2_btn = gr.Button("Extract Entities", variant="primary")
	demo2_output = gr.Markdown(label="Entities Found")
	demo2_json = gr.JSON(label="Raw Output", visible=False)

	demo2_btn.click(
	demo_ner,
	inputs=[demo2_input],
	outputs=[demo2_output, demo2_json],
	)

	gr.Markdown(
	"""
	---
	### 💡 Key Takeaways
	- Pre-trained models save time and resources
	- HuggingFace Pipelines abstract away complexity
	- Models are available for dozens of NLP tasks
	- Easy to fine-tune for specialized use cases

	Next: Head to Session 2 for hands-on development with Tokenizers and Advanced Inference! 🚀
	"""
	)

	# ===================== TAB 2: SESSION 2 - HANDS-ON DEVELOPER =====================
	with gr.Tab("Session 2: Hands-On Developer (90 min)", id="session2"):
	gr.Markdown(
	"""
	## 👨‍💻 Building End-to-End NLP Workflows with Hugging Face

	### Agenda:
	1. Tokenization Deep Dive (15 min)
	- Understanding tokenization, token IDs, and attention masks
	- How models process text internally

	2. Inference Playground (45 min)
	- Interactive demos across multiple NLP tasks
	- Learn how to use different model architectures
	- See real outputs and understand model confidence

	3. Exercise Checkpoints (20 min)
	- Try your own text inputs
	- Experiment with different examples
	- Q&A and troubleshooting

	4. Next Steps & Resources (10 min)
	- Publishing models to the Hub
	- Fine-tuning workflow overview
	- Post-session project ideas

	---

	### 🔤 Part 1: Tokenization Explorer
	"""
	)

	with gr.Group():
	gr.Markdown(
	"""
	#### How Tokenization Works
	- Text is split into tokens (words/subwords)
	- Each token gets a unique ID
	- Attention masks indicate which tokens are real vs. padding
	- This is how transformers \"understand\" text!
	"""
	)
	tok_input = gr.Textbox(
	label="Enter text to tokenize",
	value="Hello, how are you?",
	lines=2,
	)
	tok_btn = gr.Button("Tokenize", variant="primary")
	tok_output = gr.Markdown(label="Tokens")
	tok_json = gr.JSON(label="Tokenization Details", visible=False)

	tok_btn.click(
	demo_tokenization,
	inputs=[tok_input],
	outputs=[tok_output, tok_json],
	)

	gr.Markdown(
	"""
	---
	### 🎯 Part 2: Inference Playground (Choose a Task)
	"""
	)

	with gr.Tabs():
	# Task 1: Sentiment
	with gr.Tab("Sentiment Analysis"):
	gr.Markdown(
	"""
	Classify text as positive, negative, or neutral

	Model: DistilBERT fine-tuned on SST-2 dataset
	"""
	)
	sent_input = gr.Textbox(
	label="Enter text",
	value=get_sentiment_examples()[0] if get_sentiment_examples() else "I love this!",
	lines=3,
	)
	sent_btn = gr.Button("Analyze", variant="primary")
	sent_output = gr.Markdown(label="Result")
	sent_json = gr.JSON(label="Details", visible=False)

	sent_btn.click(
	demo_sentiment,
	inputs=[sent_input],
	outputs=[sent_output, sent_json],
	)

	# Task 2: NER
	with gr.Tab("Named Entity Recognition"):
	gr.Markdown(
	"""
	Identify people, organizations, locations, and more

	Model: BERT fine-tuned on CoNLL-2003 NER dataset
	"""
	)
	ner_input = gr.Textbox(
	label="Enter text",
	value=get_ner_examples()[0] if get_ner_examples() else "Apple Inc. was founded by Steve Jobs",
	lines=3,
	)
	ner_btn = gr.Button("Extract Entities", variant="primary")
	ner_output = gr.Markdown(label="Entities")
	ner_json = gr.JSON(label="Details", visible=False)

	ner_btn.click(
	demo_ner,
	inputs=[ner_input],
	outputs=[ner_output, ner_json],
	)

	# Task 3: QA
	with gr.Tab("Question Answering"):
	gr.Markdown(
	"""
	Answer questions based on provided context

	Model: RoBERTa fine-tuned on SQuAD 2.0
	"""
	)
	qa_examples = get_qa_examples()
	qa_context = gr.Textbox(
	label="Context/Passage",
	value=qa_examples[0] if qa_examples else config.TASKS["qa"]["example_context"],
	lines=4,
	)
	qa_question = gr.Textbox(
	label="Question",
	value="What is the Hugging Face Hub?",
	lines=2,
	)
	qa_btn = gr.Button("Get Answer", variant="primary")
	qa_output = gr.Markdown(label="Answer")
	qa_json = gr.JSON(label="Details", visible=False)

	qa_btn.click(
	demo_qa,
	inputs=[qa_context, qa_question],
	outputs=[qa_output, qa_json],
	)

	# Task 4: Summarization
	with gr.Tab("Text Summarization"):
	gr.Markdown(
	"""
	Generate concise summaries of longer texts

	Model: BART large fine-tuned on CNN/DailyMail
	"""
	)
	sum_examples = get_summarization_examples()
	sum_input = gr.Textbox(
	label="Text to summarize (min 20 words)",
	value=sum_examples[0] if sum_examples else config.TASKS["summarization"]["example"],
	lines=5,
	)
	sum_btn = gr.Button("Summarize", variant="primary")
	sum_output = gr.Markdown(label="Summary")
	sum_json = gr.JSON(label="Details", visible=False)

	sum_btn.click(
	demo_summarization,
	inputs=[sum_input],
	outputs=[sum_output, sum_json],
	)

	# Task 5: Semantic Similarity
	with gr.Tab("Semantic Similarity"):
	gr.Markdown(
	"""
	Compare semantic similarity between texts

	Model: Sentence-BERT (all-MiniLM-L6-v2)
	"""
	)
	emb_examples = get_embeddings_examples()
	emb_text1 = gr.Textbox(
	label="First text",
	value=emb_examples[0] if len(emb_examples) > 0 else "The cat is sleeping",
	lines=2,
	)
	emb_text2 = gr.Textbox(
	label="Second text",
	value=emb_examples[1] if len(emb_examples) > 1 else "A feline is resting",
	lines=2,
	)
	emb_btn = gr.Button("Compare", variant="primary")
	emb_output = gr.Markdown(label="Similarity")
	emb_json = gr.JSON(label="Details", visible=False)

	emb_btn.click(
	demo_similarity,
	inputs=[emb_text1, emb_text2],
	outputs=[emb_output, emb_json],
	)

	gr.Markdown(
	"""
	---
	### 🚀 Part 3: Key Concepts Recap

	✅ Transformers Architecture:
	- Self-attention mechanisms allow models to focus on relevant parts of text
	- Pre-training on large corpora + fine-tuning = transfer learning

	✅ Using HuggingFace:
	- Pipelines for quick demos
	- Fine-tuning for custom tasks
	- Model Hub for sharing and collaboration

	✅ Production Considerations:
	- Model size vs. accuracy tradeoff
	- Quantization and distillation for faster inference
	- Licensing and compliance for models
	"""
	)

	# ===================== TAB 3: RESOURCES =====================
	with gr.Tab("Resources & Next Steps", id="resources"):
	gr.Markdown(
	"""
	## 📚 Learning Resources

	### Official Documentation
	- [Hugging Face Transformers Documentation](https://huggingface.co/docs/transformers/)
	- [Hugging Face Datasets Documentation](https://huggingface.co/docs/datasets/)
	- [Hugging Face Hub Documentation](https://huggingface.co/docs/hub/)

	### Tutorials & Courses
	- [Hugging Face Course (Free)](https://huggingface.co/course/)
	- [Transformers from Scratch](https://huggingface.co/docs/transformers/training)
	- [Fine-tuning Guide](https://huggingface.co/docs/transformers/training)

	---

	## 🛠️ Popular Models to Explore

	### Text Classification
	- `distilbert-base-uncased-finetuned-sst-2-english` - Sentiment Analysis
	- `roberta-base` - General purpose classifier
	- `bert-base-multilingual-cased` - Multilingual support

	### Named Entity Recognition
	- `dslim/bert-base-NER` - English NER
	- `xlm-roberta-base` - Multilingual NER

	### Question Answering
	- `deepset/roberta-base-squad2` - SQuAD 2.0 fine-tuned
	- `bert-large-uncased-whole-word-masking-finetuned-squad` - BERT Large

	### Text Generation
	- `gpt2` - Lightweight generation
	- `facebook/bart-large` - Sequence-to-sequence
	- `google/t5-base` - T5 for various tasks

	### Embeddings & Similarity
	- `sentence-transformers/all-MiniLM-L6-v2` - Fast & efficient
	- `sentence-transformers/all-mpnet-base-v2` - High quality

	---

	## 💾 Popular Datasets

	- `glue` - General Language Understanding Evaluation
	- `wikitext` - Large language model benchmark
	- `squad` - Question answering dataset
	- `conll2003` - Named entity recognition
	- `imdb` - Sentiment analysis

	---

	## 🎯 Next Steps After the Sessions

	### Beginner Path
	1. Explore models on the Hub
	2. Try different models on your own data
	3. Learn about fine-tuning concepts

	### Intermediate Path
	1. Fine-tune a pre-trained model on your dataset
	2. Deploy a model to Spaces (like this demo!)
	3. Publish your model to the Hub

	### Advanced Path
	1. Build multi-stage pipelines
	2. Implement custom training loops
	3. Contribute to open-source projects

	---

	## 🔗 Community & Support

	- [Hugging Face Forums](https://discuss.huggingface.co/)
	- [GitHub Issues](https://github.com/huggingface/transformers/issues)
	- [Twitter/X @huggingface](https://twitter.com/huggingface)
	- Company Slack/Teams Channels

	---

	## 📝 Session Information

	Session 1: Introduction to Hugging Face (45 minutes)
	- Overview of the ecosystem
	- Core abstractions (Pipelines, Models, Tokenizers)
	- Architecture patterns
	- Enterprise considerations

	Session 2: Hands-On Developer Workshop (90 minutes)
	- Tokenization deep dive
	- Interactive inference playground (5+ NLP tasks)
	- Live coding and experimentation
	- Best practices and next steps

	---

	### Questions?
	Feel free to reach out via Slack or email during the sessions! 💬
	"""
	)

	return app


	app = create_interface()


	# ===================== MAIN =====================

	if __name__ == "__main__":
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	ssr_mode=False,
	)