| |
| |
|
|
| import pandas as pd |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.metrics.pairwise import cosine_similarity |
| import numpy as np |
|
|
| class CodeBasicsFAQ: |
| def __init__(self, csv_path='codebasics_faqs.csv'): |
| """Initialize FAQ system from CSV file""" |
| |
| encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252'] |
| df = None |
| |
| for encoding in encodings: |
| try: |
| df = pd.read_csv(csv_path, encoding=encoding) |
| print(f"β
Loaded {len(df)} FAQs") |
| break |
| except: |
| continue |
| |
| if df is None: |
| raise Exception("Could not load FAQ CSV") |
| |
| self.questions = df['prompt'].tolist() |
| self.answers = df['response'].tolist() |
| |
| |
| self.vectorizer = TfidfVectorizer( |
| lowercase=True, |
| stop_words='english', |
| ngram_range=(1, 2) |
| ) |
| |
| |
| self.question_vectors = self.vectorizer.fit_transform(self.questions) |
| print(f"β
FAQ System ready!") |
| |
| def find_best_match(self, query, threshold=0.2): |
| """Find best matching FAQ""" |
| query_vector = self.vectorizer.transform([query]) |
| similarities = cosine_similarity(query_vector, self.question_vectors)[0] |
| |
| best_idx = np.argmax(similarities) |
| best_score = similarities[best_idx] |
| |
| if best_score >= threshold: |
| return { |
| 'question': self.questions[best_idx], |
| 'answer': self.answers[best_idx], |
| 'confidence': best_score |
| } |
| return None |
| |
| def answer(self, query): |
| """Get answer for a query""" |
| result = self.find_best_match(query) |
| |
| if result: |
| return { |
| 'status': 'success', |
| 'confidence': f"{result['confidence']*100:.1f}%", |
| 'matched_question': result['question'], |
| 'answer': result['answer'] |
| } |
| else: |
| return { |
| 'status': 'no_match', |
| 'message': 'No matching FAQ found. Try rephrasing your question.' |
| } |
| |
| def search_keyword(self, keyword): |
| """Search FAQs by keyword""" |
| keyword_lower = keyword.lower() |
| matches = [] |
| |
| for i, q in enumerate(self.questions): |
| if keyword_lower in q.lower() or keyword_lower in self.answers[i].lower(): |
| matches.append({ |
| 'question': q, |
| 'answer': self.answers[i] |
| }) |
| |
| return matches |
| |
| def list_all_questions(self): |
| """Return all FAQ questions""" |
| return self.questions |
|
|
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| |
| faq = CodeBasicsFAQ('codebasics_faqs.csv') |
| |
| |
| test_questions = [ |
| "Can I take this bootcamp without programming experience?", |
| "Why should I trust Codebasics?", |
| "What are the prerequisites?", |
| "Do I need a laptop?" |
| ] |
| |
| print("\n" + "="*70) |
| print("TESTING FAQ SYSTEM") |
| print("="*70 + "\n") |
| |
| for question in test_questions: |
| print(f"β {question}") |
| result = faq.answer(question) |
| |
| if result['status'] == 'success': |
| print(f"β
Match: {result['confidence']}") |
| print(f"π Q: {result['matched_question']}") |
| print(f"π‘ A: {result['answer'][:100]}...\n") |
| else: |
| print(f"β {result['message']}\n") |
| |
| |
| print("\n" + "="*70) |
| print("INTERACTIVE MODE") |
| print("="*70) |
| print("Type 'quit' to exit\n") |
| |
| while True: |
| user_q = input("β Your question: ").strip() |
| |
| if user_q.lower() in ['quit', 'exit', 'q']: |
| print("π Goodbye!") |
| break |
| |
| if not user_q: |
| continue |
| |
| result = faq.answer(user_q) |
| |
| if result['status'] == 'success': |
| print(f"\n[Confidence: {result['confidence']}]") |
| print(f"\nπ {result['matched_question']}") |
| print(f"\n⨠{result['answer']}\n") |
| else: |
| print(f"\nβ {result['message']}\n") |
|
|