Spaces:
Running
Running
| """ | |
| HuggingFace Enabling Sessions - Gradio Interactive Demo App | |
| Hosted on HuggingFace Spaces | |
| """ | |
| import gradio as gr | |
| import config | |
| import utils | |
| import pandas as pd | |
| try: | |
| import spaces | |
| except Exception: | |
| class _SpacesFallback: | |
| def GPU(func=None, *args, **kwargs): | |
| if func is None: | |
| def decorator(inner_func): | |
| return inner_func | |
| return decorator | |
| return func | |
| spaces = _SpacesFallback() | |
| # ===================== UTILITIES ===================== | |
| def load_sample_texts(): | |
| """Load sample texts from CSV.""" | |
| try: | |
| df = pd.read_csv(config.SAMPLE_DATA_CSV) | |
| return df | |
| except: | |
| return None | |
| def get_sentiment_examples(): | |
| """Get example texts for sentiment analysis.""" | |
| try: | |
| with open(f"{config.DEMO_SAMPLES_DIR}/sentiment.txt") as f: | |
| lines = f.read().strip().split("\n") | |
| return lines | |
| except: | |
| return config.TASKS["sentiment"]["example"].split() | |
| def get_ner_examples(): | |
| """Get example texts for NER.""" | |
| try: | |
| with open(f"{config.DEMO_SAMPLES_DIR}/ner.txt") as f: | |
| lines = f.read().strip().split("\n") | |
| return lines | |
| except: | |
| return [config.TASKS["ner"]["example"]] | |
| def get_qa_examples(): | |
| """Get example context and questions for QA.""" | |
| try: | |
| with open(f"{config.DEMO_SAMPLES_DIR}/qa.txt") as f: | |
| contexts = f.read().strip().split("\n\n") | |
| return contexts | |
| except: | |
| return [config.TASKS["qa"]["example_context"]] | |
| def get_summarization_examples(): | |
| """Get example texts for summarization.""" | |
| try: | |
| with open(f"{config.DEMO_SAMPLES_DIR}/summarization.txt") as f: | |
| lines = f.read().strip().split("\n") | |
| return lines | |
| except: | |
| return [config.TASKS["summarization"]["example"]] | |
| def get_embeddings_examples(): | |
| """Get example texts for semantic similarity.""" | |
| try: | |
| with open(f"{config.DEMO_SAMPLES_DIR}/embeddings.txt") as f: | |
| lines = f.read().strip().split("\n") | |
| return lines | |
| except: | |
| return [config.TASKS["similarity"]["example1"], config.TASKS["similarity"]["example2"]] | |
| # ===================== SENTIMENT ANALYSIS ===================== | |
| def demo_sentiment(text): | |
| """Demo sentiment analysis.""" | |
| if not text.strip(): | |
| return "Please enter some text", "No input" | |
| result = utils.run_sentiment_analysis(text) | |
| output = f"**Label:** {result['label']}\n\n**Confidence:** {result['score']:.4f}" | |
| return output, result | |
| # ===================== NER ===================== | |
| def demo_ner(text): | |
| """Demo named entity recognition.""" | |
| if not text.strip(): | |
| return "Please enter some text", "No entities" | |
| results = utils.run_ner(text) | |
| if results and isinstance(results, list) and isinstance(results[0], dict) and "error" in results[0]: | |
| return f"Error: {results[0]['error']}", {"error": results[0]["error"]} | |
| formatted = utils.format_ner_output(results) | |
| return formatted, results | |
| # ===================== QUESTION ANSWERING ===================== | |
| def demo_qa(context, question): | |
| """Demo question answering.""" | |
| if not context.strip() or not question.strip(): | |
| return "Please enter both context and question", {} | |
| result = utils.run_qa(context, question) | |
| if "error" in result: | |
| return f"Error: {result['error']}", {} | |
| output = f"**Answer:** {result['answer']}\n\n**Confidence:** {result['score']:.4f}" | |
| return output, result | |
| # ===================== SUMMARIZATION ===================== | |
| def demo_summarization(text): | |
| """Demo text summarization.""" | |
| if not text.strip(): | |
| return "Please enter some text", {"error": "Please enter some text"} | |
| if len(text.split()) < 20: | |
| return "Text too short for summarization. Please provide at least 20 words.", { | |
| "error": "Text too short for summarization. Please provide at least 20 words." | |
| } | |
| summary = utils.run_summarization(text) | |
| if summary.startswith("Error:"): | |
| return summary, {"error": summary} | |
| return summary, {"summary": summary} | |
| # ===================== SEMANTIC SIMILARITY ===================== | |
| def demo_similarity(text1, text2): | |
| """Demo semantic similarity.""" | |
| if not text1.strip() or not text2.strip(): | |
| return "Please enter both texts", 0 | |
| similarity = utils.compute_similarity(text1, text2) | |
| if isinstance(similarity, str): | |
| return similarity, 0 | |
| output = f"**Similarity Score:** {similarity:.4f}\n\n(Score ranges from -1 to 1, where 1 means identical semantically)" | |
| return output, similarity | |
| # ===================== TOKENIZATION ===================== | |
| def demo_tokenization(text): | |
| """Demo tokenization.""" | |
| if not text.strip(): | |
| return "Please enter some text", "" | |
| result = utils.tokenize_text(text) | |
| if "error" in result: | |
| return f"Error: {result['error']}", "" | |
| formatted = utils.format_tokenizer_output(result) | |
| return formatted, result | |
| # ===================== GRADIO INTERFACE ===================== | |
| def create_interface(): | |
| """Create the Gradio interface with 3 tabs.""" | |
| with gr.Blocks( | |
| title="HuggingFace Enabling Sessions", | |
| ) as app: | |
| gr.Markdown( | |
| """ | |
| # π€ HuggingFace Enabling Sessions | |
| **Interactive Demo for Transformers, Hub APIs, and Pipeline Abstractions** | |
| **Duration:** Session 1: 45 min | Session 2: 90 min | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # ===================== TAB 1: SESSION 1 - INTRODUCTION ===================== | |
| with gr.Tab("Session 1: Introduction (45 min)", id="session1"): | |
| gr.Markdown( | |
| """ | |
| ## π― Introduction to Hugging Face Ecosystem | |
| ### What We'll Cover: | |
| 1. **HuggingFace Platform Overview** | |
| - The Hub: Central repository for models, datasets, and spaces | |
| - Transformers Library: Core Python library for NLP | |
| - Model Cards: Documentation and metadata for transparency | |
| 2. **Core Abstractions** | |
| - **Pipelines:** High-level API for common tasks (sentiment, NER, QA, etc.) | |
| - **Models & Tokenizers:** Lower-level building blocks | |
| - **Datasets:** Standardized data loading and processing | |
| 3. **Architecture Patterns** | |
| - **Encoders:** BERT, RoBERTa, DistilBERT β Classification, feature extraction | |
| - **Decoders:** GPT-2, GPT-3 β Text generation | |
| - **Encoder-Decoders:** T5, BART β Seq2seq (translation, summarization, QA) | |
| 4. **Enterprise NLP Landscape** | |
| - Open-source vs. Commercial models | |
| - Licensing considerations (MIT, Apache, OpenRAIL, etc.) | |
| - Fine-tuning for domain-specific tasks | |
| --- | |
| ### Live Demo: Explore the Power of Pipelines | |
| Try the demos below to see how easy it is to use pre-trained models! π | |
| """ | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### π Demo 1: Sentiment Analysis") | |
| demo1_input = gr.Textbox( | |
| label="Enter text to analyze sentiment", | |
| value="I absolutely love this product!", | |
| lines=2, | |
| ) | |
| demo1_btn = gr.Button("Analyze Sentiment", variant="primary") | |
| demo1_output = gr.Markdown(label="Result") | |
| demo1_json = gr.JSON(label="Raw Output", visible=False) | |
| demo1_btn.click( | |
| demo_sentiment, | |
| inputs=[demo1_input], | |
| outputs=[demo1_output, demo1_json], | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### π·οΈ Demo 2: Named Entity Recognition (NER)") | |
| demo2_input = gr.Textbox( | |
| label="Enter text for entity recognition", | |
| value="Apple Inc. was founded by Steve Jobs in Cupertino, California.", | |
| lines=2, | |
| ) | |
| demo2_btn = gr.Button("Extract Entities", variant="primary") | |
| demo2_output = gr.Markdown(label="Entities Found") | |
| demo2_json = gr.JSON(label="Raw Output", visible=False) | |
| demo2_btn.click( | |
| demo_ner, | |
| inputs=[demo2_input], | |
| outputs=[demo2_output, demo2_json], | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### π‘ Key Takeaways | |
| - Pre-trained models save time and resources | |
| - HuggingFace Pipelines abstract away complexity | |
| - Models are available for dozens of NLP tasks | |
| - Easy to fine-tune for specialized use cases | |
| **Next:** Head to Session 2 for hands-on development with Tokenizers and Advanced Inference! π | |
| """ | |
| ) | |
| # ===================== TAB 2: SESSION 2 - HANDS-ON DEVELOPER ===================== | |
| with gr.Tab("Session 2: Hands-On Developer (90 min)", id="session2"): | |
| gr.Markdown( | |
| """ | |
| ## π¨βπ» Building End-to-End NLP Workflows with Hugging Face | |
| ### Agenda: | |
| 1. **Tokenization Deep Dive** (15 min) | |
| - Understanding tokenization, token IDs, and attention masks | |
| - How models process text internally | |
| 2. **Inference Playground** (45 min) | |
| - Interactive demos across multiple NLP tasks | |
| - Learn how to use different model architectures | |
| - See real outputs and understand model confidence | |
| 3. **Exercise Checkpoints** (20 min) | |
| - Try your own text inputs | |
| - Experiment with different examples | |
| - Q&A and troubleshooting | |
| 4. **Next Steps & Resources** (10 min) | |
| - Publishing models to the Hub | |
| - Fine-tuning workflow overview | |
| - Post-session project ideas | |
| --- | |
| ### π€ Part 1: Tokenization Explorer | |
| """ | |
| ) | |
| with gr.Group(): | |
| gr.Markdown( | |
| """ | |
| #### How Tokenization Works | |
| - Text is split into tokens (words/subwords) | |
| - Each token gets a unique ID | |
| - Attention masks indicate which tokens are real vs. padding | |
| - This is how transformers \"understand\" text! | |
| """ | |
| ) | |
| tok_input = gr.Textbox( | |
| label="Enter text to tokenize", | |
| value="Hello, how are you?", | |
| lines=2, | |
| ) | |
| tok_btn = gr.Button("Tokenize", variant="primary") | |
| tok_output = gr.Markdown(label="Tokens") | |
| tok_json = gr.JSON(label="Tokenization Details", visible=False) | |
| tok_btn.click( | |
| demo_tokenization, | |
| inputs=[tok_input], | |
| outputs=[tok_output, tok_json], | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### π― Part 2: Inference Playground (Choose a Task) | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| # Task 1: Sentiment | |
| with gr.Tab("Sentiment Analysis"): | |
| gr.Markdown( | |
| """ | |
| **Classify text as positive, negative, or neutral** | |
| Model: DistilBERT fine-tuned on SST-2 dataset | |
| """ | |
| ) | |
| sent_input = gr.Textbox( | |
| label="Enter text", | |
| value=get_sentiment_examples()[0] if get_sentiment_examples() else "I love this!", | |
| lines=3, | |
| ) | |
| sent_btn = gr.Button("Analyze", variant="primary") | |
| sent_output = gr.Markdown(label="Result") | |
| sent_json = gr.JSON(label="Details", visible=False) | |
| sent_btn.click( | |
| demo_sentiment, | |
| inputs=[sent_input], | |
| outputs=[sent_output, sent_json], | |
| ) | |
| # Task 2: NER | |
| with gr.Tab("Named Entity Recognition"): | |
| gr.Markdown( | |
| """ | |
| **Identify people, organizations, locations, and more** | |
| Model: BERT fine-tuned on CoNLL-2003 NER dataset | |
| """ | |
| ) | |
| ner_input = gr.Textbox( | |
| label="Enter text", | |
| value=get_ner_examples()[0] if get_ner_examples() else "Apple Inc. was founded by Steve Jobs", | |
| lines=3, | |
| ) | |
| ner_btn = gr.Button("Extract Entities", variant="primary") | |
| ner_output = gr.Markdown(label="Entities") | |
| ner_json = gr.JSON(label="Details", visible=False) | |
| ner_btn.click( | |
| demo_ner, | |
| inputs=[ner_input], | |
| outputs=[ner_output, ner_json], | |
| ) | |
| # Task 3: QA | |
| with gr.Tab("Question Answering"): | |
| gr.Markdown( | |
| """ | |
| **Answer questions based on provided context** | |
| Model: RoBERTa fine-tuned on SQuAD 2.0 | |
| """ | |
| ) | |
| qa_examples = get_qa_examples() | |
| qa_context = gr.Textbox( | |
| label="Context/Passage", | |
| value=qa_examples[0] if qa_examples else config.TASKS["qa"]["example_context"], | |
| lines=4, | |
| ) | |
| qa_question = gr.Textbox( | |
| label="Question", | |
| value="What is the Hugging Face Hub?", | |
| lines=2, | |
| ) | |
| qa_btn = gr.Button("Get Answer", variant="primary") | |
| qa_output = gr.Markdown(label="Answer") | |
| qa_json = gr.JSON(label="Details", visible=False) | |
| qa_btn.click( | |
| demo_qa, | |
| inputs=[qa_context, qa_question], | |
| outputs=[qa_output, qa_json], | |
| ) | |
| # Task 4: Summarization | |
| with gr.Tab("Text Summarization"): | |
| gr.Markdown( | |
| """ | |
| **Generate concise summaries of longer texts** | |
| Model: BART large fine-tuned on CNN/DailyMail | |
| """ | |
| ) | |
| sum_examples = get_summarization_examples() | |
| sum_input = gr.Textbox( | |
| label="Text to summarize (min 20 words)", | |
| value=sum_examples[0] if sum_examples else config.TASKS["summarization"]["example"], | |
| lines=5, | |
| ) | |
| sum_btn = gr.Button("Summarize", variant="primary") | |
| sum_output = gr.Markdown(label="Summary") | |
| sum_json = gr.JSON(label="Details", visible=False) | |
| sum_btn.click( | |
| demo_summarization, | |
| inputs=[sum_input], | |
| outputs=[sum_output, sum_json], | |
| ) | |
| # Task 5: Semantic Similarity | |
| with gr.Tab("Semantic Similarity"): | |
| gr.Markdown( | |
| """ | |
| **Compare semantic similarity between texts** | |
| Model: Sentence-BERT (all-MiniLM-L6-v2) | |
| """ | |
| ) | |
| emb_examples = get_embeddings_examples() | |
| emb_text1 = gr.Textbox( | |
| label="First text", | |
| value=emb_examples[0] if len(emb_examples) > 0 else "The cat is sleeping", | |
| lines=2, | |
| ) | |
| emb_text2 = gr.Textbox( | |
| label="Second text", | |
| value=emb_examples[1] if len(emb_examples) > 1 else "A feline is resting", | |
| lines=2, | |
| ) | |
| emb_btn = gr.Button("Compare", variant="primary") | |
| emb_output = gr.Markdown(label="Similarity") | |
| emb_json = gr.JSON(label="Details", visible=False) | |
| emb_btn.click( | |
| demo_similarity, | |
| inputs=[emb_text1, emb_text2], | |
| outputs=[emb_output, emb_json], | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### π Part 3: Key Concepts Recap | |
| β **Transformers Architecture:** | |
| - Self-attention mechanisms allow models to focus on relevant parts of text | |
| - Pre-training on large corpora + fine-tuning = transfer learning | |
| β **Using HuggingFace:** | |
| - Pipelines for quick demos | |
| - Fine-tuning for custom tasks | |
| - Model Hub for sharing and collaboration | |
| β **Production Considerations:** | |
| - Model size vs. accuracy tradeoff | |
| - Quantization and distillation for faster inference | |
| - Licensing and compliance for models | |
| """ | |
| ) | |
| # ===================== TAB 3: RESOURCES ===================== | |
| with gr.Tab("Resources & Next Steps", id="resources"): | |
| gr.Markdown( | |
| """ | |
| ## π Learning Resources | |
| ### Official Documentation | |
| - [Hugging Face Transformers Documentation](https://huggingface.co/docs/transformers/) | |
| - [Hugging Face Datasets Documentation](https://huggingface.co/docs/datasets/) | |
| - [Hugging Face Hub Documentation](https://huggingface.co/docs/hub/) | |
| ### Tutorials & Courses | |
| - [Hugging Face Course (Free)](https://huggingface.co/course/) | |
| - [Transformers from Scratch](https://huggingface.co/docs/transformers/training) | |
| - [Fine-tuning Guide](https://huggingface.co/docs/transformers/training) | |
| --- | |
| ## π οΈ Popular Models to Explore | |
| ### Text Classification | |
| - `distilbert-base-uncased-finetuned-sst-2-english` - Sentiment Analysis | |
| - `roberta-base` - General purpose classifier | |
| - `bert-base-multilingual-cased` - Multilingual support | |
| ### Named Entity Recognition | |
| - `dslim/bert-base-NER` - English NER | |
| - `xlm-roberta-base` - Multilingual NER | |
| ### Question Answering | |
| - `deepset/roberta-base-squad2` - SQuAD 2.0 fine-tuned | |
| - `bert-large-uncased-whole-word-masking-finetuned-squad` - BERT Large | |
| ### Text Generation | |
| - `gpt2` - Lightweight generation | |
| - `facebook/bart-large` - Sequence-to-sequence | |
| - `google/t5-base` - T5 for various tasks | |
| ### Embeddings & Similarity | |
| - `sentence-transformers/all-MiniLM-L6-v2` - Fast & efficient | |
| - `sentence-transformers/all-mpnet-base-v2` - High quality | |
| --- | |
| ## πΎ Popular Datasets | |
| - `glue` - General Language Understanding Evaluation | |
| - `wikitext` - Large language model benchmark | |
| - `squad` - Question answering dataset | |
| - `conll2003` - Named entity recognition | |
| - `imdb` - Sentiment analysis | |
| --- | |
| ## π― Next Steps After the Sessions | |
| ### Beginner Path | |
| 1. Explore models on the Hub | |
| 2. Try different models on your own data | |
| 3. Learn about fine-tuning concepts | |
| ### Intermediate Path | |
| 1. Fine-tune a pre-trained model on your dataset | |
| 2. Deploy a model to Spaces (like this demo!) | |
| 3. Publish your model to the Hub | |
| ### Advanced Path | |
| 1. Build multi-stage pipelines | |
| 2. Implement custom training loops | |
| 3. Contribute to open-source projects | |
| --- | |
| ## π Community & Support | |
| - [Hugging Face Forums](https://discuss.huggingface.co/) | |
| - [GitHub Issues](https://github.com/huggingface/transformers/issues) | |
| - [Twitter/X @huggingface](https://twitter.com/huggingface) | |
| - Company Slack/Teams Channels | |
| --- | |
| ## π Session Information | |
| **Session 1: Introduction to Hugging Face** (45 minutes) | |
| - Overview of the ecosystem | |
| - Core abstractions (Pipelines, Models, Tokenizers) | |
| - Architecture patterns | |
| - Enterprise considerations | |
| **Session 2: Hands-On Developer Workshop** (90 minutes) | |
| - Tokenization deep dive | |
| - Interactive inference playground (5+ NLP tasks) | |
| - Live coding and experimentation | |
| - Best practices and next steps | |
| --- | |
| ### Questions? | |
| Feel free to reach out via Slack or email during the sessions! π¬ | |
| """ | |
| ) | |
| return app | |
| app = create_interface() | |
| # ===================== MAIN ===================== | |
| if __name__ == "__main__": | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| ssr_mode=False, | |
| ) | |