callidus
/

good

Text Generation

question-answering

Model card Files Files and versions

good / faq_system.py

callidus's picture

Upload faq_system.py with huggingface_hub

ba29aa6 verified 5 months ago

history blame contribute delete

4.63 kB

	# CodeBasics FAQ System
	# Smart FAQ retrieval using TF-IDF and cosine similarity

	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np

	class CodeBasicsFAQ:
	def __init__(self, csv_path='codebasics_faqs.csv'):
	"""Initialize FAQ system from CSV file"""
	# Load FAQ data
	encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252']
	df = None

	for encoding in encodings:
	try:
	df = pd.read_csv(csv_path, encoding=encoding)
	print(f"✅ Loaded {len(df)} FAQs")
	break
	except:
	continue

	if df is None:
	raise Exception("Could not load FAQ CSV")

	self.questions = df['prompt'].tolist()
	self.answers = df['response'].tolist()

	# Create TF-IDF vectorizer
	self.vectorizer = TfidfVectorizer(
	lowercase=True,
	stop_words='english',
	ngram_range=(1, 2)
	)

	# Fit on all questions
	self.question_vectors = self.vectorizer.fit_transform(self.questions)
	print(f"✅ FAQ System ready!")

	def find_best_match(self, query, threshold=0.2):
	"""Find best matching FAQ"""
	query_vector = self.vectorizer.transform([query])
	similarities = cosine_similarity(query_vector, self.question_vectors)[0]

	best_idx = np.argmax(similarities)
	best_score = similarities[best_idx]

	if best_score >= threshold:
	return {
	'question': self.questions[best_idx],
	'answer': self.answers[best_idx],
	'confidence': best_score
	}
	return None

	def answer(self, query):
	"""Get answer for a query"""
	result = self.find_best_match(query)

	if result:
	return {
	'status': 'success',
	'confidence': f"{result['confidence']*100:.1f}%",
	'matched_question': result['question'],
	'answer': result['answer']
	}
	else:
	return {
	'status': 'no_match',
	'message': 'No matching FAQ found. Try rephrasing your question.'
	}

	def search_keyword(self, keyword):
	"""Search FAQs by keyword"""
	keyword_lower = keyword.lower()
	matches = []

	for i, q in enumerate(self.questions):
	if keyword_lower in q.lower() or keyword_lower in self.answers[i].lower():
	matches.append({
	'question': q,
	'answer': self.answers[i]
	})

	return matches

	def list_all_questions(self):
	"""Return all FAQ questions"""
	return self.questions


	# ============================================================================
	# USAGE EXAMPLE
	# ============================================================================

	if __name__ == "__main__":
	# Initialize
	faq = CodeBasicsFAQ('codebasics_faqs.csv')

	# Example questions
	test_questions = [
	"Can I take this bootcamp without programming experience?",
	"Why should I trust Codebasics?",
	"What are the prerequisites?",
	"Do I need a laptop?"
	]

	print("\n" + "="*70)
	print("TESTING FAQ SYSTEM")
	print("="*70 + "\n")

	for question in test_questions:
	print(f"❓ {question}")
	result = faq.answer(question)

	if result['status'] == 'success':
	print(f"✅ Match: {result['confidence']}")
	print(f"📝 Q: {result['matched_question']}")
	print(f"💡 A: {result['answer'][:100]}...\n")
	else:
	print(f"❌ {result['message']}\n")

	# Interactive mode
	print("\n" + "="*70)
	print("INTERACTIVE MODE")
	print("="*70)
	print("Type 'quit' to exit\n")

	while True:
	user_q = input("❓ Your question: ").strip()

	if user_q.lower() in ['quit', 'exit', 'q']:
	print("👋 Goodbye!")
	break

	if not user_q:
	continue

	result = faq.answer(user_q)

	if result['status'] == 'success':
	print(f"\n[Confidence: {result['confidence']}]")
	print(f"\n📌 {result['matched_question']}")
	print(f"\n✨ {result['answer']}\n")
	else:
	print(f"\n❌ {result['message']}\n")