| import os |
| import json |
| import numpy as np |
| import faiss |
| import gradio as gr |
| from PyPDF2 import PdfReader |
| import re |
| from sentence_transformers import SentenceTransformer |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
| |
| with open("knowledge_base.json", "r") as file: |
| kb = json.load(file) |
|
|
| |
| os.system("huggingface-cli login") |
|
|
| |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") |
| kb_texts = [f"{item['Component']} {item['Range']} {item['Advice']}" for item in kb] |
| kb_embeddings = embedding_model.encode(kb_texts) |
| kb_embeddings = np.array(kb_embeddings, dtype="float32") |
|
|
| index = faiss.IndexFlatL2(kb_embeddings.shape[1]) |
| index.add(kb_embeddings) |
|
|
| |
| llama_model_name = "meta-llama/Llama-3.2-3B-Instruct" |
| API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
| tokenizer = AutoTokenizer.from_pretrained(llama_model_name, token=API_TOKEN) |
| llm = AutoModelForCausalLM.from_pretrained(llama_model_name, token=API_TOKEN) |
|
|
| |
| def generate_advice(extracted_data): |
| try: |
| recommendations = [] |
|
|
| for item in extracted_data: |
| query = f"{item['Component']} {item['Status']}" |
| print(f"Processing Query: {query}") |
|
|
| |
| query_embedding = embedding_model.encode([query]) |
| query_embedding = np.array(query_embedding, dtype="float32").reshape(1, -1) |
|
|
| |
| _, idx = index.search(query_embedding, 1) |
| best_match = kb[idx[0][0]] |
|
|
| |
| role = "Medical expert providing advice based on lab results." |
| prompt = f""" |
| Lab Test: {item['Component']} |
| Value: {item['Value']} {item['Units']} |
| Status: {item['Status']} |
| |
| Medical Guidelines: {best_match['Advice']} |
| |
| Provide additional insights or recommendations. |
| """ |
|
|
| |
| message = [ |
| {"role": "system", "content": role}, |
| {"role": "user", "content": prompt}, |
| ] |
|
|
| input_text = tokenizer.apply_chat_template( |
| message, tokenize=True, add_generation_prompt=True, return_tensors="pt" |
| ) |
|
|
| output = llm.generate( |
| input_ids=input_text["input_ids"], |
| max_length=150, |
| num_return_sequences=1 |
| ) |
|
|
| advice = tokenizer.decode(output[0], skip_special_tokens=True).strip() |
| recommendations.append({"Component": item["Component"], "Advice": advice}) |
|
|
| return recommendations |
|
|
| except Exception as e: |
| print(f"Error: {e}") |
| return [{"error": f"Exception occurred: {str(e)}"}] |
|
|
|
|
| |
| def pdf_to_text(pdf_file): |
| try: |
| reader = PdfReader(pdf_file.name) |
| text = "" |
| for page in reader.pages: |
| text += page.extract_text() |
|
|
| |
| pattern = r"(\w+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\w/%]+)\s+(\w+)" |
| matches = re.findall(pattern, text) |
|
|
| |
| if matches: |
| results = [ |
| {"Component": m[0], "Value": float(m[1]), "Min": float(m[2]), "Max": float(m[3]), "Units": m[4], "Status": m[5]} |
| for m in matches |
| ] |
| return results |
| else: |
| return "No structured data found in the PDF." |
|
|
| except Exception as e: |
| return f"Error: {e}" |
|
|
| |
| def main(): |
| with gr.Blocks() as app: |
| gr.Markdown("## Medical Test Interpreter with RAG and LLM") |
| with gr.Row(): |
| pdf_input = gr.File(label="Upload PDF", type="filepath") |
| structured_data = gr.JSON(label="Extracted Structured Data") |
| advice_output = gr.JSON(label="Generated Advice") |
| extract_button = gr.Button("Extract Data") |
| interpret_button = gr.Button("Get Advice") |
|
|
| extract_button.click(pdf_to_text, inputs=pdf_input, outputs=structured_data) |
| interpret_button.click(generate_advice, inputs=structured_data, outputs=advice_output) |
|
|
| app.launch() |
|
|
| |
| if __name__ == "__main__": |
| main() |