| from datasets import load_dataset |
| from llama_index.core import VectorStoreIndex, Document |
| from llama_index.core.indices.query.query_transform.base import ( |
| HyDEQueryTransform, |
| ) |
| from llama_index.core.query_engine import TransformQueryEngine |
| from llama_index.core import Settings |
| from llama_index.embeddings.openai import OpenAIEmbedding |
| import gradio as gr |
|
|
| Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-3-small") |
|
|
| |
| dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train") |
| documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset] |
|
|
| |
| |
| index = VectorStoreIndex.from_documents(documents) |
| retriever = index.as_retriever( |
| similarity_top_k=7, |
| vector_store_query_mode="default" |
| ) |
|
|
|
|
| def ask(question): |
| nodes = retriever.retrieve(question) |
| full_result = "" |
| for node in nodes: |
| output = f"score: {str(node.score)}\nmetadata: {str(node.metadata)}\ntext: {node.text}\n\n\n" |
| full_result += output |
| return full_result |
|
|
|
|
| with gr.Blocks(title="Megillah Search") as demo: |
| gr.Markdown("# Megillah Search") |
| gr.Markdown("Search through the Megillah dataset") |
|
|
| question = gr.Textbox(label="Question", placeholder="Ask a question about Megillah...") |
| submit_btn = gr.Button("Search") |
| answer = gr.Textbox(label="Sources", lines=20) |
|
|
| submit_btn.click(fn=ask, inputs=question, outputs=answer) |
| question.submit(fn=ask, inputs=question, outputs=answer) |
| demo.launch(share=True) |
|
|