infy / scripts /config_local.py
shourya
Fix NER model ID and add resilient fallback loading
d153152
#!/usr/bin/env python3
"""
Updated config that loads models from local repo storage
Falls back to HF Hub if local models not available
"""
import os
from pathlib import Path
# Models directory in repo
MODELS_DIR = Path(__file__).parent.parent / "models"
# Try to use local models first, fallback to HF Hub IDs
LOCAL_MODELS_AVAILABLE = MODELS_DIR.exists()
if LOCAL_MODELS_AVAILABLE:
print(f"πŸ“ Loading models from local repo: {MODELS_DIR}")
# Use local paths
SENTIMENT_MODEL = str(MODELS_DIR / "sentiment" / "model")
SENTIMENT_TOKENIZER = str(MODELS_DIR / "sentiment" / "tokenizer")
NER_MODEL = "dslim/bert-base-NER" # Can add locally if needed
QA_MODEL = "deepset/roberta-base-squad2" # Can add locally if needed
SUMMARIZATION_MODEL = "facebook/bart-large-cnn" # Too large for repo
EMBEDDINGS_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Can add locally if needed
else:
print("🌐 Local models not found, using HF Hub (will download on first use)")
# Fall back to HF Hub
SENTIMENT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
SENTIMENT_TOKENIZER = "bert-base-uncased"
NER_MODEL = "dslim/bert-base-NER"
QA_MODEL = "deepset/roberta-base-squad2"
SUMMARIZATION_MODEL = "facebook/bart-large-cnn"
EMBEDDINGS_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
# Task definitions
TASKS = {
"sentiment": {
"name": "Sentiment Analysis",
"description": "Classify text sentiment (positive/negative/neutral)",
"model": SENTIMENT_MODEL,
"example": "I absolutely love this product! It's amazing and works perfectly.",
},
"ner": {
"name": "Named Entity Recognition",
"description": "Identify and classify named entities (Person, Location, Organization)",
"model": NER_MODEL,
"example": "Apple Inc. was founded by Steve Jobs in Cupertino, California.",
},
"qa": {
"name": "Question Answering",
"description": "Answer questions based on provided context",
"model": QA_MODEL,
"example_context": "The Hugging Face Hub is a platform for sharing machine learning models, datasets, and demos.",
"example_question": "What is the Hugging Face Hub?",
},
"summarization": {
"name": "Text Summarization",
"description": "Generate concise summaries of longer texts",
"model": SUMMARIZATION_MODEL,
"example": "The Hugging Face transformers library provides state-of-the-art pre-trained models for natural language processing tasks. It supports PyTorch and TensorFlow, making it easy to use with either framework.",
},
"similarity": {
"name": "Semantic Similarity",
"description": "Compare semantic similarity between two sentences",
"model": EMBEDDINGS_MODEL,
"example1": "The cat is sleeping on the mat",
"example2": "A feline is resting on the rug",
},
}
# Sample data paths
SAMPLE_DATA_CSV = "data/sample_texts.csv"
DEMO_SAMPLES_DIR = "data/demo_samples"
# Session timings
SESSION1_DURATION = 45 # minutes
SESSION2_DURATION = 90 # minutes