infy / app.py
shourya
Use explicit Spaces GPU decorators
d1105a7
"""
HuggingFace Enabling Sessions - Gradio Interactive Demo App
Hosted on HuggingFace Spaces
"""
import gradio as gr
import config
import utils
import pandas as pd
try:
import spaces
except Exception:
class _SpacesFallback:
@staticmethod
def GPU(func=None, *args, **kwargs):
if func is None:
def decorator(inner_func):
return inner_func
return decorator
return func
spaces = _SpacesFallback()
# ===================== UTILITIES =====================
def load_sample_texts():
"""Load sample texts from CSV."""
try:
df = pd.read_csv(config.SAMPLE_DATA_CSV)
return df
except:
return None
def get_sentiment_examples():
"""Get example texts for sentiment analysis."""
try:
with open(f"{config.DEMO_SAMPLES_DIR}/sentiment.txt") as f:
lines = f.read().strip().split("\n")
return lines
except:
return config.TASKS["sentiment"]["example"].split()
def get_ner_examples():
"""Get example texts for NER."""
try:
with open(f"{config.DEMO_SAMPLES_DIR}/ner.txt") as f:
lines = f.read().strip().split("\n")
return lines
except:
return [config.TASKS["ner"]["example"]]
def get_qa_examples():
"""Get example context and questions for QA."""
try:
with open(f"{config.DEMO_SAMPLES_DIR}/qa.txt") as f:
contexts = f.read().strip().split("\n\n")
return contexts
except:
return [config.TASKS["qa"]["example_context"]]
def get_summarization_examples():
"""Get example texts for summarization."""
try:
with open(f"{config.DEMO_SAMPLES_DIR}/summarization.txt") as f:
lines = f.read().strip().split("\n")
return lines
except:
return [config.TASKS["summarization"]["example"]]
def get_embeddings_examples():
"""Get example texts for semantic similarity."""
try:
with open(f"{config.DEMO_SAMPLES_DIR}/embeddings.txt") as f:
lines = f.read().strip().split("\n")
return lines
except:
return [config.TASKS["similarity"]["example1"], config.TASKS["similarity"]["example2"]]
# ===================== SENTIMENT ANALYSIS =====================
@spaces.GPU
def demo_sentiment(text):
"""Demo sentiment analysis."""
if not text.strip():
return "Please enter some text", "No input"
result = utils.run_sentiment_analysis(text)
output = f"**Label:** {result['label']}\n\n**Confidence:** {result['score']:.4f}"
return output, result
# ===================== NER =====================
@spaces.GPU
def demo_ner(text):
"""Demo named entity recognition."""
if not text.strip():
return "Please enter some text", "No entities"
results = utils.run_ner(text)
if results and isinstance(results, list) and isinstance(results[0], dict) and "error" in results[0]:
return f"Error: {results[0]['error']}", {"error": results[0]["error"]}
formatted = utils.format_ner_output(results)
return formatted, results
# ===================== QUESTION ANSWERING =====================
@spaces.GPU
def demo_qa(context, question):
"""Demo question answering."""
if not context.strip() or not question.strip():
return "Please enter both context and question", {}
result = utils.run_qa(context, question)
if "error" in result:
return f"Error: {result['error']}", {}
output = f"**Answer:** {result['answer']}\n\n**Confidence:** {result['score']:.4f}"
return output, result
# ===================== SUMMARIZATION =====================
@spaces.GPU
def demo_summarization(text):
"""Demo text summarization."""
if not text.strip():
return "Please enter some text", {"error": "Please enter some text"}
if len(text.split()) < 20:
return "Text too short for summarization. Please provide at least 20 words.", {
"error": "Text too short for summarization. Please provide at least 20 words."
}
summary = utils.run_summarization(text)
if summary.startswith("Error:"):
return summary, {"error": summary}
return summary, {"summary": summary}
# ===================== SEMANTIC SIMILARITY =====================
@spaces.GPU
def demo_similarity(text1, text2):
"""Demo semantic similarity."""
if not text1.strip() or not text2.strip():
return "Please enter both texts", 0
similarity = utils.compute_similarity(text1, text2)
if isinstance(similarity, str):
return similarity, 0
output = f"**Similarity Score:** {similarity:.4f}\n\n(Score ranges from -1 to 1, where 1 means identical semantically)"
return output, similarity
# ===================== TOKENIZATION =====================
@spaces.GPU
def demo_tokenization(text):
"""Demo tokenization."""
if not text.strip():
return "Please enter some text", ""
result = utils.tokenize_text(text)
if "error" in result:
return f"Error: {result['error']}", ""
formatted = utils.format_tokenizer_output(result)
return formatted, result
# ===================== GRADIO INTERFACE =====================
def create_interface():
"""Create the Gradio interface with 3 tabs."""
with gr.Blocks(
title="HuggingFace Enabling Sessions",
) as app:
gr.Markdown(
"""
# πŸ€— HuggingFace Enabling Sessions
**Interactive Demo for Transformers, Hub APIs, and Pipeline Abstractions**
**Duration:** Session 1: 45 min | Session 2: 90 min
"""
)
with gr.Tabs():
# ===================== TAB 1: SESSION 1 - INTRODUCTION =====================
with gr.Tab("Session 1: Introduction (45 min)", id="session1"):
gr.Markdown(
"""
## 🎯 Introduction to Hugging Face Ecosystem
### What We'll Cover:
1. **HuggingFace Platform Overview**
- The Hub: Central repository for models, datasets, and spaces
- Transformers Library: Core Python library for NLP
- Model Cards: Documentation and metadata for transparency
2. **Core Abstractions**
- **Pipelines:** High-level API for common tasks (sentiment, NER, QA, etc.)
- **Models & Tokenizers:** Lower-level building blocks
- **Datasets:** Standardized data loading and processing
3. **Architecture Patterns**
- **Encoders:** BERT, RoBERTa, DistilBERT β†’ Classification, feature extraction
- **Decoders:** GPT-2, GPT-3 β†’ Text generation
- **Encoder-Decoders:** T5, BART β†’ Seq2seq (translation, summarization, QA)
4. **Enterprise NLP Landscape**
- Open-source vs. Commercial models
- Licensing considerations (MIT, Apache, OpenRAIL, etc.)
- Fine-tuning for domain-specific tasks
---
### Live Demo: Explore the Power of Pipelines
Try the demos below to see how easy it is to use pre-trained models! πŸ‘‡
"""
)
with gr.Group():
gr.Markdown("### πŸ“Š Demo 1: Sentiment Analysis")
demo1_input = gr.Textbox(
label="Enter text to analyze sentiment",
value="I absolutely love this product!",
lines=2,
)
demo1_btn = gr.Button("Analyze Sentiment", variant="primary")
demo1_output = gr.Markdown(label="Result")
demo1_json = gr.JSON(label="Raw Output", visible=False)
demo1_btn.click(
demo_sentiment,
inputs=[demo1_input],
outputs=[demo1_output, demo1_json],
)
with gr.Group():
gr.Markdown("### 🏷️ Demo 2: Named Entity Recognition (NER)")
demo2_input = gr.Textbox(
label="Enter text for entity recognition",
value="Apple Inc. was founded by Steve Jobs in Cupertino, California.",
lines=2,
)
demo2_btn = gr.Button("Extract Entities", variant="primary")
demo2_output = gr.Markdown(label="Entities Found")
demo2_json = gr.JSON(label="Raw Output", visible=False)
demo2_btn.click(
demo_ner,
inputs=[demo2_input],
outputs=[demo2_output, demo2_json],
)
gr.Markdown(
"""
---
### πŸ’‘ Key Takeaways
- Pre-trained models save time and resources
- HuggingFace Pipelines abstract away complexity
- Models are available for dozens of NLP tasks
- Easy to fine-tune for specialized use cases
**Next:** Head to Session 2 for hands-on development with Tokenizers and Advanced Inference! πŸš€
"""
)
# ===================== TAB 2: SESSION 2 - HANDS-ON DEVELOPER =====================
with gr.Tab("Session 2: Hands-On Developer (90 min)", id="session2"):
gr.Markdown(
"""
## πŸ‘¨β€πŸ’» Building End-to-End NLP Workflows with Hugging Face
### Agenda:
1. **Tokenization Deep Dive** (15 min)
- Understanding tokenization, token IDs, and attention masks
- How models process text internally
2. **Inference Playground** (45 min)
- Interactive demos across multiple NLP tasks
- Learn how to use different model architectures
- See real outputs and understand model confidence
3. **Exercise Checkpoints** (20 min)
- Try your own text inputs
- Experiment with different examples
- Q&A and troubleshooting
4. **Next Steps & Resources** (10 min)
- Publishing models to the Hub
- Fine-tuning workflow overview
- Post-session project ideas
---
### πŸ”€ Part 1: Tokenization Explorer
"""
)
with gr.Group():
gr.Markdown(
"""
#### How Tokenization Works
- Text is split into tokens (words/subwords)
- Each token gets a unique ID
- Attention masks indicate which tokens are real vs. padding
- This is how transformers \"understand\" text!
"""
)
tok_input = gr.Textbox(
label="Enter text to tokenize",
value="Hello, how are you?",
lines=2,
)
tok_btn = gr.Button("Tokenize", variant="primary")
tok_output = gr.Markdown(label="Tokens")
tok_json = gr.JSON(label="Tokenization Details", visible=False)
tok_btn.click(
demo_tokenization,
inputs=[tok_input],
outputs=[tok_output, tok_json],
)
gr.Markdown(
"""
---
### 🎯 Part 2: Inference Playground (Choose a Task)
"""
)
with gr.Tabs():
# Task 1: Sentiment
with gr.Tab("Sentiment Analysis"):
gr.Markdown(
"""
**Classify text as positive, negative, or neutral**
Model: DistilBERT fine-tuned on SST-2 dataset
"""
)
sent_input = gr.Textbox(
label="Enter text",
value=get_sentiment_examples()[0] if get_sentiment_examples() else "I love this!",
lines=3,
)
sent_btn = gr.Button("Analyze", variant="primary")
sent_output = gr.Markdown(label="Result")
sent_json = gr.JSON(label="Details", visible=False)
sent_btn.click(
demo_sentiment,
inputs=[sent_input],
outputs=[sent_output, sent_json],
)
# Task 2: NER
with gr.Tab("Named Entity Recognition"):
gr.Markdown(
"""
**Identify people, organizations, locations, and more**
Model: BERT fine-tuned on CoNLL-2003 NER dataset
"""
)
ner_input = gr.Textbox(
label="Enter text",
value=get_ner_examples()[0] if get_ner_examples() else "Apple Inc. was founded by Steve Jobs",
lines=3,
)
ner_btn = gr.Button("Extract Entities", variant="primary")
ner_output = gr.Markdown(label="Entities")
ner_json = gr.JSON(label="Details", visible=False)
ner_btn.click(
demo_ner,
inputs=[ner_input],
outputs=[ner_output, ner_json],
)
# Task 3: QA
with gr.Tab("Question Answering"):
gr.Markdown(
"""
**Answer questions based on provided context**
Model: RoBERTa fine-tuned on SQuAD 2.0
"""
)
qa_examples = get_qa_examples()
qa_context = gr.Textbox(
label="Context/Passage",
value=qa_examples[0] if qa_examples else config.TASKS["qa"]["example_context"],
lines=4,
)
qa_question = gr.Textbox(
label="Question",
value="What is the Hugging Face Hub?",
lines=2,
)
qa_btn = gr.Button("Get Answer", variant="primary")
qa_output = gr.Markdown(label="Answer")
qa_json = gr.JSON(label="Details", visible=False)
qa_btn.click(
demo_qa,
inputs=[qa_context, qa_question],
outputs=[qa_output, qa_json],
)
# Task 4: Summarization
with gr.Tab("Text Summarization"):
gr.Markdown(
"""
**Generate concise summaries of longer texts**
Model: BART large fine-tuned on CNN/DailyMail
"""
)
sum_examples = get_summarization_examples()
sum_input = gr.Textbox(
label="Text to summarize (min 20 words)",
value=sum_examples[0] if sum_examples else config.TASKS["summarization"]["example"],
lines=5,
)
sum_btn = gr.Button("Summarize", variant="primary")
sum_output = gr.Markdown(label="Summary")
sum_json = gr.JSON(label="Details", visible=False)
sum_btn.click(
demo_summarization,
inputs=[sum_input],
outputs=[sum_output, sum_json],
)
# Task 5: Semantic Similarity
with gr.Tab("Semantic Similarity"):
gr.Markdown(
"""
**Compare semantic similarity between texts**
Model: Sentence-BERT (all-MiniLM-L6-v2)
"""
)
emb_examples = get_embeddings_examples()
emb_text1 = gr.Textbox(
label="First text",
value=emb_examples[0] if len(emb_examples) > 0 else "The cat is sleeping",
lines=2,
)
emb_text2 = gr.Textbox(
label="Second text",
value=emb_examples[1] if len(emb_examples) > 1 else "A feline is resting",
lines=2,
)
emb_btn = gr.Button("Compare", variant="primary")
emb_output = gr.Markdown(label="Similarity")
emb_json = gr.JSON(label="Details", visible=False)
emb_btn.click(
demo_similarity,
inputs=[emb_text1, emb_text2],
outputs=[emb_output, emb_json],
)
gr.Markdown(
"""
---
### πŸš€ Part 3: Key Concepts Recap
βœ… **Transformers Architecture:**
- Self-attention mechanisms allow models to focus on relevant parts of text
- Pre-training on large corpora + fine-tuning = transfer learning
βœ… **Using HuggingFace:**
- Pipelines for quick demos
- Fine-tuning for custom tasks
- Model Hub for sharing and collaboration
βœ… **Production Considerations:**
- Model size vs. accuracy tradeoff
- Quantization and distillation for faster inference
- Licensing and compliance for models
"""
)
# ===================== TAB 3: RESOURCES =====================
with gr.Tab("Resources & Next Steps", id="resources"):
gr.Markdown(
"""
## πŸ“š Learning Resources
### Official Documentation
- [Hugging Face Transformers Documentation](https://huggingface.co/docs/transformers/)
- [Hugging Face Datasets Documentation](https://huggingface.co/docs/datasets/)
- [Hugging Face Hub Documentation](https://huggingface.co/docs/hub/)
### Tutorials & Courses
- [Hugging Face Course (Free)](https://huggingface.co/course/)
- [Transformers from Scratch](https://huggingface.co/docs/transformers/training)
- [Fine-tuning Guide](https://huggingface.co/docs/transformers/training)
---
## πŸ› οΈ Popular Models to Explore
### Text Classification
- `distilbert-base-uncased-finetuned-sst-2-english` - Sentiment Analysis
- `roberta-base` - General purpose classifier
- `bert-base-multilingual-cased` - Multilingual support
### Named Entity Recognition
- `dslim/bert-base-NER` - English NER
- `xlm-roberta-base` - Multilingual NER
### Question Answering
- `deepset/roberta-base-squad2` - SQuAD 2.0 fine-tuned
- `bert-large-uncased-whole-word-masking-finetuned-squad` - BERT Large
### Text Generation
- `gpt2` - Lightweight generation
- `facebook/bart-large` - Sequence-to-sequence
- `google/t5-base` - T5 for various tasks
### Embeddings & Similarity
- `sentence-transformers/all-MiniLM-L6-v2` - Fast & efficient
- `sentence-transformers/all-mpnet-base-v2` - High quality
---
## πŸ’Ύ Popular Datasets
- `glue` - General Language Understanding Evaluation
- `wikitext` - Large language model benchmark
- `squad` - Question answering dataset
- `conll2003` - Named entity recognition
- `imdb` - Sentiment analysis
---
## 🎯 Next Steps After the Sessions
### Beginner Path
1. Explore models on the Hub
2. Try different models on your own data
3. Learn about fine-tuning concepts
### Intermediate Path
1. Fine-tune a pre-trained model on your dataset
2. Deploy a model to Spaces (like this demo!)
3. Publish your model to the Hub
### Advanced Path
1. Build multi-stage pipelines
2. Implement custom training loops
3. Contribute to open-source projects
---
## πŸ”— Community & Support
- [Hugging Face Forums](https://discuss.huggingface.co/)
- [GitHub Issues](https://github.com/huggingface/transformers/issues)
- [Twitter/X @huggingface](https://twitter.com/huggingface)
- Company Slack/Teams Channels
---
## πŸ“ Session Information
**Session 1: Introduction to Hugging Face** (45 minutes)
- Overview of the ecosystem
- Core abstractions (Pipelines, Models, Tokenizers)
- Architecture patterns
- Enterprise considerations
**Session 2: Hands-On Developer Workshop** (90 minutes)
- Tokenization deep dive
- Interactive inference playground (5+ NLP tasks)
- Live coding and experimentation
- Best practices and next steps
---
### Questions?
Feel free to reach out via Slack or email during the sessions! πŸ’¬
"""
)
return app
app = create_interface()
# ===================== MAIN =====================
if __name__ == "__main__":
app.launch(
server_name="0.0.0.0",
server_port=7860,
ssr_mode=False,
)