Spaces:

Demosthene-OR
/

Knowledge_Graph_Generator

Sleeping

App Files Files Community

Demosthene-OR commited on 21 days ago

Commit

a2110a1

1 Parent(s): 5f00917

.....

Browse files

Files changed (5) hide show

app.py +48 -47
app_v1.py +66 -0
generate_knowledge_graph.py +59 -82
generate_knowledge_graph_v1.py +127 -0
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,66 +1,67 @@
-# Import necessary modules
 import streamlit as st
-import streamlit.components.v1 as components  # For embedding custom HTML
-from generate_knowledge_graph import generate_knowledge_graph
-# Set up Streamlit page configuration
 st.set_page_config(
-    page_icon=None,
-    layout="wide",  # Use wide layout for better graph display
-    initial_sidebar_state="auto",
     menu_items=None
 )
-# Set the title of the app
 st.title("Knowledge Graph From Text")
-# Sidebar section for user input method
 st.sidebar.title("Input document")
 input_method = st.sidebar.radio(
     "Choose an input method:",
-    ["Upload txt", "Input text"],  # Options for uploading a file or manually inputting text
 )
-# Case 1: User chooses to upload a .txt file
-if input_method == "Upload txt":
-    # File uploader widget in the sidebar
-    uploaded_file = st.sidebar.file_uploader(label="Upload file", type=["txt"])
     if uploaded_file is not None:
-        # Read the uploaded file content and decode it as UTF-8 text
         text = uploaded_file.read().decode("utf-8")
-        # Button to generate the knowledge graph
-        if st.sidebar.button("Generate Knowledge Graph"):
-            with st.spinner("Generating knowledge graph..."):
-                # Call the function to generate the graph from the text
-                net = generate_knowledge_graph(text)
-                st.success("Knowledge graph generated successfully!")
-                # Save the graph to an HTML file
-                output_file = "knowledge_graph.html"
-                net.save_graph(output_file)
-                # Open the HTML file and display it within the Streamlit app
-                HtmlFile = open(output_file, 'r', encoding='utf-8')
-                components.html(HtmlFile.read(), height=1000)
-# Case 2: User chooses to directly input text
 else:
-    # Text area for manual input
     text = st.sidebar.text_area("Input text", height=300)
-    if text:  # Check if the text area is not empty
-        if st.sidebar.button("Generate Knowledge Graph"):
-            with st.spinner("Generating knowledge graph..."):
-                # Call the function to generate the graph from the input text
-                net = generate_knowledge_graph(text)
-                st.success("Knowledge graph generated successfully!")
-                # Save the graph to an HTML file
-                output_file = "knowledge_graph.html"
-                net.save_graph(output_file)
-                # Open the HTML file and display it within the Streamlit app
-                HtmlFile = open(output_file, 'r', encoding='utf-8')
-                components.html(HtmlFile.read(), height=1000)

 import streamlit as st
+import streamlit.components.v1 as components
+from generate_knowledge_graph import generate_knowledge_graph, answer_question_with_graph
 st.set_page_config(
+    page_icon="None",
+    layout="wide",
+    initial_sidebar_state="auto",
     menu_items=None
 )
 st.title("Knowledge Graph From Text")
 st.sidebar.title("Input document")
 input_method = st.sidebar.radio(
     "Choose an input method:",
+    ("Upload .txt", "Input text")
 )
+# Text extraction based on user choice
+text = ""
+if input_method == "Upload .txt":
+    uploaded_file = st.sidebar.file_uploader(label="Upload file", type="txt")
     if uploaded_file is not None:
         text = uploaded_file.read().decode("utf-8")
 else:
     text = st.sidebar.text_area("Input text", height=300)
+if st.sidebar.button("1. Generate Knowledge Graph"):
+    if text:
+        with st.spinner("Generating knowledge graph..."):
+            net, graph_docs = generate_knowledge_graph(text)
+            st.session_state['graph_docs'] = graph_docs
+            st.success("Knowledge graph generated successfully!")
+            output_file = "knowledge_graph.html"
+            net.save_graph(output_file)
+            HtmlFile = open(output_file, 'r', encoding='utf-8')
+            components.html(HtmlFile.read(), height=600)
+    else:
+        st.sidebar.error("Please provide some text to generate the graph.")
+# QA Section
+if 'graph_docs' in st.session_state:
+    st.markdown("---")
+    st.subheader("Posez une question sur le document")
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        question = st.text_input("Votre question :")
+    with col2:
+        k_value = st.slider("Relations à analyser (Top K)", min_value=1, max_value=20, value=5)
+    if st.button("2. Analyser") and question:
+        with st.spinner("Recherche sémantique dans le graphe en cours..."):
+            answer, filtered_net = answer_question_with_graph(
+                question,
+                st.session_state['graph_docs'],
+                k_relations=k_value
+            )
+            st.info(f"**Réponse :** {answer}")
+            st.markdown("**Sous-graphe des relations utilisées pour répondre :**")
+            HtmlFile = open("filtered_graph.html", 'r', encoding='utf-8')
+            components.html(HtmlFile.read(), height=450)

app_v1.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# Import necessary modules
+import streamlit as st
+import streamlit.components.v1 as components  # For embedding custom HTML
+from generate_knowledge_graph import generate_knowledge_graph
+# Set up Streamlit page configuration
+st.set_page_config(
+    page_icon=None,
+    layout="wide",  # Use wide layout for better graph display
+    initial_sidebar_state="auto",
+    menu_items=None
+)
+# Set the title of the app
+st.title("Knowledge Graph From Text")
+# Sidebar section for user input method
+st.sidebar.title("Input document")
+input_method = st.sidebar.radio(
+    "Choose an input method:",
+    ["Upload txt", "Input text"],  # Options for uploading a file or manually inputting text
+)
+# Case 1: User chooses to upload a .txt file
+if input_method == "Upload txt":
+    # File uploader widget in the sidebar
+    uploaded_file = st.sidebar.file_uploader(label="Upload file", type=["txt"])
+    if uploaded_file is not None:
+        # Read the uploaded file content and decode it as UTF-8 text
+        text = uploaded_file.read().decode("utf-8")
+        # Button to generate the knowledge graph
+        if st.sidebar.button("Generate Knowledge Graph"):
+            with st.spinner("Generating knowledge graph..."):
+                # Call the function to generate the graph from the text
+                net = generate_knowledge_graph(text)
+                st.success("Knowledge graph generated successfully!")
+                # Save the graph to an HTML file
+                output_file = "knowledge_graph.html"
+                net.save_graph(output_file)
+                # Open the HTML file and display it within the Streamlit app
+                HtmlFile = open(output_file, 'r', encoding='utf-8')
+                components.html(HtmlFile.read(), height=1000)
+# Case 2: User chooses to directly input text
+else:
+    # Text area for manual input
+    text = st.sidebar.text_area("Input text", height=300)
+    if text:  # Check if the text area is not empty
+        if st.sidebar.button("Generate Knowledge Graph"):
+            with st.spinner("Generating knowledge graph..."):
+                # Call the function to generate the graph from the input text
+                net = generate_knowledge_graph(text)
+                st.success("Knowledge graph generated successfully!")
+                # Save the graph to an HTML file
+                output_file = "knowledge_graph.html"
+                net.save_graph(output_file)
+                # Open the HTML file and display it within the Streamlit app
+                HtmlFile = open(output_file, 'r', encoding='utf-8')
+                components.html(HtmlFile.read(), height=1000)

generate_knowledge_graph.py CHANGED Viewed

@@ -1,127 +1,104 @@
 from langchain_experimental.graph_transformers import LLMGraphTransformer
 from langchain_core.documents import Document
-from langchain_openai import ChatOpenAI
 from pyvis.network import Network
 from dotenv import load_dotenv
 import os
 import asyncio
-# Load the .env file
 load_dotenv()
-# Get API key from environment variable
 api_key = os.getenv("OPENAI_API_KEY")
 llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
 graph_transformer = LLMGraphTransformer(llm=llm)
-# Extract graph data from input text
 async def extract_graph_data(text):
-    """
-    Asynchronously extracts graph data from input text using a graph transformer.
-    Args:
-        text (str): Input text to be processed into graph format.
-    Returns:
-        list: A list of GraphDocument objects containing nodes and relationships.
-    """
     documents = [Document(page_content=text)]
     graph_documents = await graph_transformer.aconvert_to_graph_documents(documents)
     return graph_documents
 def visualize_graph(graph_documents):
-    """
-    Visualizes a knowledge graph using PyVis based on the extracted graph documents.
-    Args:
-        graph_documents (list): A list of GraphDocument objects with nodes and relationships.
-    Returns:
-        pyvis.network.Network: The visualized network graph object.
-    """
-    # Create network
-    net = Network(height="1200px", width="100%", directed=True,
-                      notebook=False, bgcolor="#222222", font_color="white", filter_menu=True, cdn_resources='remote')
     nodes = graph_documents[0].nodes
     relationships = graph_documents[0].relationships
-    # Build lookup for valid nodes
     node_dict = {node.id: node for node in nodes}
-    # Filter out invalid edges and collect valid node IDs
     valid_edges = []
     valid_node_ids = set()
     for rel in relationships:
         if rel.source.id in node_dict and rel.target.id in node_dict:
             valid_edges.append(rel)
             valid_node_ids.update([rel.source.id, rel.target.id])
-    # Track which nodes are part of any relationship
-    connected_node_ids = set()
-    for rel in relationships:
-        connected_node_ids.add(rel.source.id)
-        connected_node_ids.add(rel.target.id)
-    # Add valid nodes to the graph
     for node_id in valid_node_ids:
         node = node_dict[node_id]
         try:
             net.add_node(node.id, label=node.id, title=node.type, group=node.type)
         except:
-            continue  # Skip node if error occurs
-    # Add valid edges to the graph
     for rel in valid_edges:
         try:
             net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
         except:
-            continue  # Skip edge if error occurs
-    # Configure graph layout and physics
-    net.set_options("""
-        {
-            "physics": {
-                "forceAtlas2Based": {
-                    "gravitationalConstant": -100,
-                    "centralGravity": 0.01,
-                    "springLength": 200,
-                    "springConstant": 0.08
-                },
-                "minVelocity": 0.75,
-                "solver": "forceAtlas2Based"
-            }
-        }
-    """)
-    output_file = "knowledge_graph.html"
-    try:
-        net.save_graph(output_file)
-        print(f"Graph saved to {os.path.abspath(output_file)}")
-        return net
-    except Exception as e:
-        print(f"Error saving graph: {e}")
-        return None
 def generate_knowledge_graph(text):
-    """
-    Generates and visualizes a knowledge graph from input text.
-    This function runs the graph extraction asynchronously and then visualizes
-    the resulting graph using PyVis.
-    Args:
-        text (str): Input text to convert into a knowledge graph.
-    Returns:
-        pyvis.network.Network: The visualized network graph object.
-    """
     graph_documents = asyncio.run(extract_graph_data(text))
     net = visualize_graph(graph_documents)
-    return net

 from langchain_experimental.graph_transformers import LLMGraphTransformer
 from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_core.prompts import PromptTemplate
 from pyvis.network import Network
 from dotenv import load_dotenv
 import os
 import asyncio
 load_dotenv()
 api_key = os.getenv("OPENAI_API_KEY")
 llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
 graph_transformer = LLMGraphTransformer(llm=llm)
 async def extract_graph_data(text):
     documents = [Document(page_content=text)]
     graph_documents = await graph_transformer.aconvert_to_graph_documents(documents)
     return graph_documents
 def visualize_graph(graph_documents):
+    net = Network(height="600px", width="100%", directed=True, notebook=False, bgcolor="#222222", font_color="white", filter_menu=True, cdn_resources='remote')
     nodes = graph_documents[0].nodes
     relationships = graph_documents[0].relationships
     node_dict = {node.id: node for node in nodes}
     valid_edges = []
     valid_node_ids = set()
     for rel in relationships:
         if rel.source.id in node_dict and rel.target.id in node_dict:
             valid_edges.append(rel)
             valid_node_ids.update([rel.source.id, rel.target.id])
     for node_id in valid_node_ids:
         node = node_dict[node_id]
         try:
             net.add_node(node.id, label=node.id, title=node.type, group=node.type)
         except:
+            continue
     for rel in valid_edges:
         try:
             net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
         except:
+            continue
+    net.set_options('{"physics": {"forceAtlas2Based": {"gravitationalConstant": -100, "centralGravity": 0.01, "springLength": 200, "springConstant": 0.08}, "minVelocity": 0.75, "solver": "forceAtlas2Based"}}')
+    return net
 def generate_knowledge_graph(text):
     graph_documents = asyncio.run(extract_graph_data(text))
     net = visualize_graph(graph_documents)
+    return net, graph_documents
+def answer_question_with_graph(question, graph_documents, k_relations=5):
+    all_relationships = []
+    for doc in graph_documents:
+        all_relationships.extend(doc.relationships)
+    if not all_relationships:
+        return "Aucune relation trouvée dans le graphe.", visualize_graph(graph_documents)
+    rel_docs = []
+    for i, rel in enumerate(all_relationships):
+        text_rep = f"L'entité '{rel.source.id}' a pour relation '{rel.type}' avec l'entité '{rel.target.id}'."
+        rel_docs.append(Document(page_content=text_rep, metadata={"rel_index": i}))
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    vectorstore = FAISS.from_documents(rel_docs, embeddings)
+    retrieved_docs = vectorstore.similarity_search(question, k=k_relations)
+    used_relationships = [all_relationships[doc.metadata["rel_index"]] for doc in retrieved_docs]
+    context = "\n".join([doc.page_content for doc in retrieved_docs])
+    prompt = PromptTemplate(
+        template=""""Tu es un assistant expert qui répond aux questions en se basant UNIQUEMENT sur ce sous-ensemble de relations extraites d'un graphe de connaissances.\n\nContexte (Relations pertinentes trouvées) :\n{context}\n\nQuestion : {question}\n\nRéponds de manière claire et concise en français. Si la réponse n'est pas dans le contexte fourni, dis-le explicitement."""",
+        input_variables=["context", "question"]
+    )
+    chain = prompt | llm
+    answer = chain.invoke({"context": context, "question": question}).content
+    net = Network(height="450px", width="100%", directed=True, bgcolor="#222222", font_color="white")
+    nodes_added = set()
+    for rel in used_relationships:
+        if rel.source.id not in nodes_added:
+            net.add_node(rel.source.id, label=rel.source.id, title=rel.source.type, group=rel.source.type)
+            nodes_added.add(rel.source.id)
+        if rel.target.id not in nodes_added:
+            net.add_node(rel.target.id, label=rel.target.id, title=rel.target.type, group=rel.target.type)
+            nodes_added.add(rel.target.id)
+        try:
+            net.add_edge(rel.source.id, rel.target.id, label=rel.type)
+        except:
+            pass
+    net.set_options('{"physics": {"forceAtlas2Based": {"gravitationalConstant": -50}}}')
+    net.save_graph("filtered_graph.html")
+    return answer, net

generate_knowledge_graph_v1.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from langchain_experimental.graph_transformers import LLMGraphTransformer
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from pyvis.network import Network
+from dotenv import load_dotenv
+import os
+import asyncio
+# Load the .env file
+load_dotenv()
+# Get API key from environment variable
+api_key = os.getenv("OPENAI_API_KEY")
+llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
+graph_transformer = LLMGraphTransformer(llm=llm)
+# Extract graph data from input text
+async def extract_graph_data(text):
+    """
+    Asynchronously extracts graph data from input text using a graph transformer.
+    Args:
+        text (str): Input text to be processed into graph format.
+    Returns:
+        list: A list of GraphDocument objects containing nodes and relationships.
+    """
+    documents = [Document(page_content=text)]
+    graph_documents = await graph_transformer.aconvert_to_graph_documents(documents)
+    return graph_documents
+def visualize_graph(graph_documents):
+    """
+    Visualizes a knowledge graph using PyVis based on the extracted graph documents.
+    Args:
+        graph_documents (list): A list of GraphDocument objects with nodes and relationships.
+    Returns:
+        pyvis.network.Network: The visualized network graph object.
+    """
+    # Create network
+    net = Network(height="1200px", width="100%", directed=True,
+                      notebook=False, bgcolor="#222222", font_color="white", filter_menu=True, cdn_resources='remote')
+    nodes = graph_documents[0].nodes
+    relationships = graph_documents[0].relationships
+    # Build lookup for valid nodes
+    node_dict = {node.id: node for node in nodes}
+    # Filter out invalid edges and collect valid node IDs
+    valid_edges = []
+    valid_node_ids = set()
+    for rel in relationships:
+        if rel.source.id in node_dict and rel.target.id in node_dict:
+            valid_edges.append(rel)
+            valid_node_ids.update([rel.source.id, rel.target.id])
+    # Track which nodes are part of any relationship
+    connected_node_ids = set()
+    for rel in relationships:
+        connected_node_ids.add(rel.source.id)
+        connected_node_ids.add(rel.target.id)
+    # Add valid nodes to the graph
+    for node_id in valid_node_ids:
+        node = node_dict[node_id]
+        try:
+            net.add_node(node.id, label=node.id, title=node.type, group=node.type)
+        except:
+            continue  # Skip node if error occurs
+    # Add valid edges to the graph
+    for rel in valid_edges:
+        try:
+            net.add_edge(rel.source.id, rel.target.id, label=rel.type.lower())
+        except:
+            continue  # Skip edge if error occurs
+    # Configure graph layout and physics
+    net.set_options("""
+        {
+            "physics": {
+                "forceAtlas2Based": {
+                    "gravitationalConstant": -100,
+                    "centralGravity": 0.01,
+                    "springLength": 200,
+                    "springConstant": 0.08
+                },
+                "minVelocity": 0.75,
+                "solver": "forceAtlas2Based"
+            }
+        }
+    """)
+    output_file = "knowledge_graph.html"
+    try:
+        net.save_graph(output_file)
+        print(f"Graph saved to {os.path.abspath(output_file)}")
+        return net
+    except Exception as e:
+        print(f"Error saving graph: {e}")
+        return None
+def generate_knowledge_graph(text):
+    """
+    Generates and visualizes a knowledge graph from input text.
+    This function runs the graph extraction asynchronously and then visualizes
+    the resulting graph using PyVis.
+    Args:
+        text (str): Input text to convert into a knowledge graph.
+    Returns:
+        pyvis.network.Network: The visualized network graph object.
+    """
+    graph_documents = asyncio.run(extract_graph_data(text))
+    net = visualize_graph(graph_documents)
+    return net

requirements.txt CHANGED Viewed

@@ -11,3 +11,6 @@ pyvis>=0.3.2
 # Web UI
 streamlit>=1.32.0

 # Web UI
 streamlit>=1.32.0
+faiss-cpu
+tiktoken