Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files Community

gzdaniel commited on Jul 5, 2025

Commit

31493cc

1 Parent(s): 79126fd

Refactor `setup.py` to utilize `pyproject.toml` for project installation.

Browse files

Files changed (5) hide show

README-zh.md +11 -9
README.md +11 -9
lightrag/kg/faiss_impl.py +1 -5
pyproject.toml +93 -0
setup.py +4 -104

README-zh.md CHANGED Viewed

@@ -757,6 +757,8 @@ async def initialize_rag():
 <details>
 <summary> <b>使用Faiss进行存储</b> </summary>
 - 安装所需依赖：
@@ -818,7 +820,7 @@ rag = LightRAG(
   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
   -- 如有必要可以删除
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
@@ -1164,17 +1166,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
         async def load_existing_lightrag():
             # 首先，创建或加载现有的 LightRAG 实例
             lightrag_working_dir = "./existing_lightrag_storage"
             # 检查是否存在之前的 LightRAG 实例
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
             # 使用您的配置创建/加载 LightRAG 实例
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
@@ -1197,10 +1199,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                     ),
                 )
             )
             # 初始化存储（如果有现有数据，这将加载现有数据）
             await lightrag_instance.initialize_storages()
             # 现在使用现有的 LightRAG 实例初始化 RAGAnything
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # 传递现有的 LightRAG 实例
@@ -1229,20 +1231,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
                 )
                 # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
             )
             # 查询现有的知识库
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
             # 向现有的 LightRAG 实例添加新的多模态文档
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```

 <details>
 <summary> <b>使用Faiss进行存储</b> </summary>
+在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。
 - 安装所需依赖：
   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
   -- 如有必要可以删除
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
         async def load_existing_lightrag():
             # 首先，创建或加载现有的 LightRAG 实例
             lightrag_working_dir = "./existing_lightrag_storage"
             # 检查是否存在之前的 LightRAG 实例
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
             # 使用您的配置创建/加载 LightRAG 实例
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
                     ),
                 )
             )
             # 初始化存储（如果有现有数据，这将加载现有数据）
             await lightrag_instance.initialize_storages()
             # 现在使用现有的 LightRAG 实例初始化 RAGAnything
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # 传递现有的 LightRAG 实例
                 )
                 # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
             )
             # 查询现有的知识库
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
             # 向现有的 LightRAG 实例添加新的多模态文档
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```

README.md CHANGED Viewed

@@ -792,7 +792,7 @@ For production level scenarios you will most likely want to leverage an enterpri
   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
   -- drop if necessary
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
@@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
 <details>
 <summary> <b>Using Faiss for Storage</b> </summary>
 - Install the required dependencies:
@@ -1178,17 +1180,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
         async def load_existing_lightrag():
             # First, create or load an existing LightRAG instance
             lightrag_working_dir = "./existing_lightrag_storage"
             # Check if previous LightRAG instance exists
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
             # Create/Load LightRAG instance with your configurations
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
@@ -1211,10 +1213,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                     ),
                 )
             )
             # Initialize storage (this will load existing data if available)
             await lightrag_instance.initialize_storages()
             # Now initialize RAGAnything with the existing LightRAG instance
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # Pass the existing LightRAG instance
@@ -1243,20 +1245,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
                 )
                 # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
             )
             # Query the existing knowledge base
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
             # Add new multimodal documents to the existing LightRAG instance
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```

   create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
   CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
   ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
   -- drop if necessary
   drop INDEX entity_p_idx;
   drop INDEX vertex_p_idx;
 <details>
 <summary> <b>Using Faiss for Storage</b> </summary>
+You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
+Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
 - Install the required dependencies:
         from lightrag.llm.openai import openai_complete_if_cache, openai_embed
         from lightrag.utils import EmbeddingFunc
         import os
         async def load_existing_lightrag():
             # First, create or load an existing LightRAG instance
             lightrag_working_dir = "./existing_lightrag_storage"
             # Check if previous LightRAG instance exists
             if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
                 print("✅ Found existing LightRAG instance, loading...")
             else:
                 print("❌ No existing LightRAG instance found, will create new one")
             # Create/Load LightRAG instance with your configurations
             lightrag_instance = LightRAG(
                 working_dir=lightrag_working_dir,
                     ),
                 )
             )
             # Initialize storage (this will load existing data if available)
             await lightrag_instance.initialize_storages()
             # Now initialize RAGAnything with the existing LightRAG instance
             rag = RAGAnything(
                 lightrag=lightrag_instance,  # Pass the existing LightRAG instance
                 )
                 # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
             )
             # Query the existing knowledge base
             result = await rag.query_with_multimodal(
                 "What data has been processed in this LightRAG instance?",
                 mode="hybrid"
             )
             print("Query result:", result)
             # Add new multimodal documents to the existing LightRAG instance
             await rag.process_document_complete(
                 file_path="path/to/new/multimodal_document.pdf",
                 output_dir="./output"
             )
         if __name__ == "__main__":
             asyncio.run(load_existing_lightrag())
     ```

lightrag/kg/faiss_impl.py CHANGED Viewed

@@ -17,11 +17,7 @@ from .shared_storage import (
     set_all_update_flags,
 )
-USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
-FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
-if not pm.is_installed(FAISS_PACKAGE):
-    pm.install(FAISS_PACKAGE)
 import faiss  # type: ignore

     set_all_update_flags,
 )
+# You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
 import faiss  # type: ignore

pyproject.toml ADDED Viewed

	@@ -0,0 +1,93 @@

+[build-system]
+requires = ["setuptools>=64", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "lightrag-hku"
+dynamic = ["version"]
+authors = [
+    {name = "Zirui Guo"}
+]
+description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.9"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+]
+[project.optional-dependencies]
+api = [
+    # Core dependencies
+    "aiohttp",
+    "configparser",
+    "dotenv",
+    "future",
+    "numpy",
+    "openai",
+    "pandas>=2.0.0",
+    "pipmaster",
+    "pydantic",
+    "python-dotenv",
+    "pyuca",
+    "setuptools",
+    "tenacity",
+    "tiktoken",
+    "xlsxwriter>=3.1.0",
+    # API-specific dependencies
+    "aiofiles",
+    "ascii_colors",
+    "asyncpg",
+    "distro",
+    "fastapi",
+    "httpcore",
+    "httpx",
+    "jiter",
+    "passlib[bcrypt]",
+    "PyJWT",
+    "python-jose[cryptography]",
+    "python-multipart",
+    "pytz",
+    "uvicorn",
+]
+[project.scripts]
+lightrag-server = "lightrag.api.lightrag_server:main"
+lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
+[project.urls]
+Homepage = "https://github.com/HKUDS/LightRAG"
+Documentation = "https://github.com/HKUDS/LightRAG"
+Repository = "https://github.com/HKUDS/LightRAG"
+"Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
+[tool.setuptools]
+packages = ["lightrag"]
+include-package-data = true
+[tool.setuptools.dynamic]
+version = {attr = "lightrag.__version__"}
+[tool.setuptools.package-data]
+lightrag = ["api/webui/**/*"]

setup.py CHANGED Viewed

@@ -1,106 +1,6 @@
-import setuptools
-from pathlib import Path
-# Reading the long description from README.md
-def read_long_description():
-    try:
-        return Path("README.md").read_text(encoding="utf-8")
-    except FileNotFoundError:
-        return "A description of LightRAG is currently unavailable."
-# Retrieving metadata from __init__.py
-def retrieve_metadata():
-    vars2find = ["__author__", "__version__", "__url__"]
-    vars2readme = {}
-    try:
-        with open("./lightrag/__init__.py") as f:
-            for line in f.readlines():
-                for v in vars2find:
-                    if line.startswith(v):
-                        line = (
-                            line.replace(" ", "")
-                            .replace('"', "")
-                            .replace("'", "")
-                            .strip()
-                        )
-                        vars2readme[v] = line.split("=")[1]
-    except FileNotFoundError:
-        raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
-    # Checking if all required variables are found
-    missing_vars = [v for v in vars2find if v not in vars2readme]
-    if missing_vars:
-        raise ValueError(
-            f"Missing required metadata variables in __init__.py: {missing_vars}"
-        )
-    return vars2readme
-# Reading dependencies from requirements.txt
-def read_requirements(file_path="requirements.txt"):
-    deps = []
-    try:
-        with open(file_path) as f:
-            deps = [
-                line.strip() for line in f if line.strip() and not line.startswith("#")
-            ]
-    except FileNotFoundError:
-        print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
-    return deps
-def read_api_requirements():
-    return read_requirements("lightrag/api/requirements.txt")
-def read_extra_requirements():
-    return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
-metadata = retrieve_metadata()
-long_description = read_long_description()
-requirements = read_requirements()
-setuptools.setup(
-    name="lightrag-hku",
-    url=metadata["__url__"],
-    version=metadata["__version__"],
-    author=metadata["__author__"],
-    description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    packages=setuptools.find_packages(
-        exclude=("tests*", "docs*")
-    ),  # Automatically find packages
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Intended Audience :: Developers",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    python_requires=">=3.9",
-    install_requires=requirements,
-    include_package_data=True,  # Includes non-code files from MANIFEST.in
-    project_urls={  # Additional project metadata
-        "Documentation": metadata.get("__url__", ""),
-        "Source": metadata.get("__url__", ""),
-        "Tracker": f"{metadata.get('__url__', '')}/issues"
-        if metadata.get("__url__")
-        else "",
-    },
-    extras_require={
-        "api": requirements + read_api_requirements(),
-        "tools": read_extra_requirements(),  # API requirements as optional
-    },
-    entry_points={
-        "console_scripts": [
-            "lightrag-server=lightrag.api.lightrag_server:main [api]",
-            "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
-        ],
-    },
-)

+# Minimal setup.py for backward compatibility
+# Primary configuration is now in pyproject.toml
+from setuptools import setup
+setup()