gzdaniel commited on
Commit
31493cc
·
1 Parent(s): 79126fd

Refactor `setup.py` to utilize `pyproject.toml` for project installation.

Browse files
Files changed (5) hide show
  1. README-zh.md +11 -9
  2. README.md +11 -9
  3. lightrag/kg/faiss_impl.py +1 -5
  4. pyproject.toml +93 -0
  5. setup.py +4 -104
README-zh.md CHANGED
@@ -757,6 +757,8 @@ async def initialize_rag():
757
 
758
  <details>
759
  <summary> <b>使用Faiss进行存储</b> </summary>
 
 
760
 
761
  - 安装所需依赖:
762
 
@@ -818,7 +820,7 @@ rag = LightRAG(
818
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
819
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
820
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
821
-
822
  -- 如有必要可以删除
823
  drop INDEX entity_p_idx;
824
  drop INDEX vertex_p_idx;
@@ -1164,17 +1166,17 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
1164
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1165
  from lightrag.utils import EmbeddingFunc
1166
  import os
1167
-
1168
  async def load_existing_lightrag():
1169
  # 首先,创建或加载现有的 LightRAG 实例
1170
  lightrag_working_dir = "./existing_lightrag_storage"
1171
-
1172
  # 检查是否存在之前的 LightRAG 实例
1173
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1174
  print("✅ Found existing LightRAG instance, loading...")
1175
  else:
1176
  print("❌ No existing LightRAG instance found, will create new one")
1177
-
1178
  # 使用您的配置创建/加载 LightRAG 实例
1179
  lightrag_instance = LightRAG(
1180
  working_dir=lightrag_working_dir,
@@ -1197,10 +1199,10 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
1197
  ),
1198
  )
1199
  )
1200
-
1201
  # 初始化存储(如果有现有数据,这将加载现有数据)
1202
  await lightrag_instance.initialize_storages()
1203
-
1204
  # 现在使用现有的 LightRAG 实例初始化 RAGAnything
1205
  rag = RAGAnything(
1206
  lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
@@ -1229,20 +1231,20 @@ LightRAG 现已与 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) 实现
1229
  )
1230
  # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
1231
  )
1232
-
1233
  # 查询现有的知识库
1234
  result = await rag.query_with_multimodal(
1235
  "What data has been processed in this LightRAG instance?",
1236
  mode="hybrid"
1237
  )
1238
  print("Query result:", result)
1239
-
1240
  # 向现有的 LightRAG 实例添加新的多模态文档
1241
  await rag.process_document_complete(
1242
  file_path="path/to/new/multimodal_document.pdf",
1243
  output_dir="./output"
1244
  )
1245
-
1246
  if __name__ == "__main__":
1247
  asyncio.run(load_existing_lightrag())
1248
  ```
 
757
 
758
  <details>
759
  <summary> <b>使用Faiss进行存储</b> </summary>
760
+ 在使用Faiss向量数据库之前必须手工安装`faiss-cpu`或`faiss-gpu`。
761
+
762
 
763
  - 安装所需依赖:
764
 
 
820
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
821
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
822
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
823
+
824
  -- 如有必要可以删除
825
  drop INDEX entity_p_idx;
826
  drop INDEX vertex_p_idx;
 
1166
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1167
  from lightrag.utils import EmbeddingFunc
1168
  import os
1169
+
1170
  async def load_existing_lightrag():
1171
  # 首先,创建或加载现有的 LightRAG 实例
1172
  lightrag_working_dir = "./existing_lightrag_storage"
1173
+
1174
  # 检查是否存在之前的 LightRAG 实例
1175
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1176
  print("✅ Found existing LightRAG instance, loading...")
1177
  else:
1178
  print("❌ No existing LightRAG instance found, will create new one")
1179
+
1180
  # 使用您的配置创建/加载 LightRAG 实例
1181
  lightrag_instance = LightRAG(
1182
  working_dir=lightrag_working_dir,
 
1199
  ),
1200
  )
1201
  )
1202
+
1203
  # 初始化存储(如果有现有数据,这将加载现有数据)
1204
  await lightrag_instance.initialize_storages()
1205
+
1206
  # 现在使用现有的 LightRAG 实例初始化 RAGAnything
1207
  rag = RAGAnything(
1208
  lightrag=lightrag_instance, # 传递现有的 LightRAG 实例
 
1231
  )
1232
  # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
1233
  )
1234
+
1235
  # 查询现有的知识库
1236
  result = await rag.query_with_multimodal(
1237
  "What data has been processed in this LightRAG instance?",
1238
  mode="hybrid"
1239
  )
1240
  print("Query result:", result)
1241
+
1242
  # 向现有的 LightRAG 实例添加新的多模态文档
1243
  await rag.process_document_complete(
1244
  file_path="path/to/new/multimodal_document.pdf",
1245
  output_dir="./output"
1246
  )
1247
+
1248
  if __name__ == "__main__":
1249
  asyncio.run(load_existing_lightrag())
1250
  ```
README.md CHANGED
@@ -792,7 +792,7 @@ For production level scenarios you will most likely want to leverage an enterpri
792
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
793
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
794
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
795
-
796
  -- drop if necessary
797
  drop INDEX entity_p_idx;
798
  drop INDEX vertex_p_idx;
@@ -819,6 +819,8 @@ For production level scenarios you will most likely want to leverage an enterpri
819
 
820
  <details>
821
  <summary> <b>Using Faiss for Storage</b> </summary>
 
 
822
 
823
  - Install the required dependencies:
824
 
@@ -1178,17 +1180,17 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1178
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1179
  from lightrag.utils import EmbeddingFunc
1180
  import os
1181
-
1182
  async def load_existing_lightrag():
1183
  # First, create or load an existing LightRAG instance
1184
  lightrag_working_dir = "./existing_lightrag_storage"
1185
-
1186
  # Check if previous LightRAG instance exists
1187
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1188
  print("✅ Found existing LightRAG instance, loading...")
1189
  else:
1190
  print("❌ No existing LightRAG instance found, will create new one")
1191
-
1192
  # Create/Load LightRAG instance with your configurations
1193
  lightrag_instance = LightRAG(
1194
  working_dir=lightrag_working_dir,
@@ -1211,10 +1213,10 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1211
  ),
1212
  )
1213
  )
1214
-
1215
  # Initialize storage (this will load existing data if available)
1216
  await lightrag_instance.initialize_storages()
1217
-
1218
  # Now initialize RAGAnything with the existing LightRAG instance
1219
  rag = RAGAnything(
1220
  lightrag=lightrag_instance, # Pass the existing LightRAG instance
@@ -1243,20 +1245,20 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
1243
  )
1244
  # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
1245
  )
1246
-
1247
  # Query the existing knowledge base
1248
  result = await rag.query_with_multimodal(
1249
  "What data has been processed in this LightRAG instance?",
1250
  mode="hybrid"
1251
  )
1252
  print("Query result:", result)
1253
-
1254
  # Add new multimodal documents to the existing LightRAG instance
1255
  await rag.process_document_complete(
1256
  file_path="path/to/new/multimodal_document.pdf",
1257
  output_dir="./output"
1258
  )
1259
-
1260
  if __name__ == "__main__":
1261
  asyncio.run(load_existing_lightrag())
1262
  ```
 
792
  create INDEX CONCURRENTLY entity_idx_node_id ON dickens."Entity" (ag_catalog.agtype_access_operator(properties, '"node_id"'::agtype));
793
  CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON dickens."Entity" using gin(properties);
794
  ALTER TABLE dickens."DIRECTED" CLUSTER ON directed_sid_idx;
795
+
796
  -- drop if necessary
797
  drop INDEX entity_p_idx;
798
  drop INDEX vertex_p_idx;
 
819
 
820
  <details>
821
  <summary> <b>Using Faiss for Storage</b> </summary>
822
+ You must manually install faiss-cpu or faiss-gpu before using FAISS vector db.
823
+ Manually install `faiss-cpu` or `faiss-gpu` before using FAISS vector db.
824
 
825
  - Install the required dependencies:
826
 
 
1180
  from lightrag.llm.openai import openai_complete_if_cache, openai_embed
1181
  from lightrag.utils import EmbeddingFunc
1182
  import os
1183
+
1184
  async def load_existing_lightrag():
1185
  # First, create or load an existing LightRAG instance
1186
  lightrag_working_dir = "./existing_lightrag_storage"
1187
+
1188
  # Check if previous LightRAG instance exists
1189
  if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
1190
  print("✅ Found existing LightRAG instance, loading...")
1191
  else:
1192
  print("❌ No existing LightRAG instance found, will create new one")
1193
+
1194
  # Create/Load LightRAG instance with your configurations
1195
  lightrag_instance = LightRAG(
1196
  working_dir=lightrag_working_dir,
 
1213
  ),
1214
  )
1215
  )
1216
+
1217
  # Initialize storage (this will load existing data if available)
1218
  await lightrag_instance.initialize_storages()
1219
+
1220
  # Now initialize RAGAnything with the existing LightRAG instance
1221
  rag = RAGAnything(
1222
  lightrag=lightrag_instance, # Pass the existing LightRAG instance
 
1245
  )
1246
  # Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
1247
  )
1248
+
1249
  # Query the existing knowledge base
1250
  result = await rag.query_with_multimodal(
1251
  "What data has been processed in this LightRAG instance?",
1252
  mode="hybrid"
1253
  )
1254
  print("Query result:", result)
1255
+
1256
  # Add new multimodal documents to the existing LightRAG instance
1257
  await rag.process_document_complete(
1258
  file_path="path/to/new/multimodal_document.pdf",
1259
  output_dir="./output"
1260
  )
1261
+
1262
  if __name__ == "__main__":
1263
  asyncio.run(load_existing_lightrag())
1264
  ```
lightrag/kg/faiss_impl.py CHANGED
@@ -17,11 +17,7 @@ from .shared_storage import (
17
  set_all_update_flags,
18
  )
19
 
20
- USE_GPU = os.getenv("FAISS_USE_GPU", "0") == "1"
21
- FAISS_PACKAGE = "faiss-gpu" if USE_GPU else "faiss-cpu"
22
- if not pm.is_installed(FAISS_PACKAGE):
23
- pm.install(FAISS_PACKAGE)
24
-
25
  import faiss # type: ignore
26
 
27
 
 
17
  set_all_update_flags,
18
  )
19
 
20
+ # You must manually install faiss-cpu or faiss-gpu before using FAISS vector db
 
 
 
 
21
  import faiss # type: ignore
22
 
23
 
pyproject.toml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "lightrag-hku"
7
+ dynamic = ["version"]
8
+ authors = [
9
+ {name = "Zirui Guo"}
10
+ ]
11
+ description = "LightRAG: Simple and Fast Retrieval-Augmented Generation"
12
+ readme = "README.md"
13
+ license = {text = "MIT"}
14
+ requires-python = ">=3.9"
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Intended Audience :: Developers",
21
+ "Topic :: Software Development :: Libraries :: Python Modules",
22
+ ]
23
+ dependencies = [
24
+ "aiohttp",
25
+ "configparser",
26
+ "dotenv",
27
+ "future",
28
+ "numpy",
29
+ "pandas>=2.0.0",
30
+ "pipmaster",
31
+ "pydantic",
32
+ "python-dotenv",
33
+ "pyuca",
34
+ "setuptools",
35
+ "tenacity",
36
+ "tiktoken",
37
+ "xlsxwriter>=3.1.0",
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ api = [
42
+ # Core dependencies
43
+ "aiohttp",
44
+ "configparser",
45
+ "dotenv",
46
+ "future",
47
+ "numpy",
48
+ "openai",
49
+ "pandas>=2.0.0",
50
+ "pipmaster",
51
+ "pydantic",
52
+ "python-dotenv",
53
+ "pyuca",
54
+ "setuptools",
55
+ "tenacity",
56
+ "tiktoken",
57
+ "xlsxwriter>=3.1.0",
58
+ # API-specific dependencies
59
+ "aiofiles",
60
+ "ascii_colors",
61
+ "asyncpg",
62
+ "distro",
63
+ "fastapi",
64
+ "httpcore",
65
+ "httpx",
66
+ "jiter",
67
+ "passlib[bcrypt]",
68
+ "PyJWT",
69
+ "python-jose[cryptography]",
70
+ "python-multipart",
71
+ "pytz",
72
+ "uvicorn",
73
+ ]
74
+
75
+ [project.scripts]
76
+ lightrag-server = "lightrag.api.lightrag_server:main"
77
+ lightrag-gunicorn = "lightrag.api.run_with_gunicorn:main"
78
+
79
+ [project.urls]
80
+ Homepage = "https://github.com/HKUDS/LightRAG"
81
+ Documentation = "https://github.com/HKUDS/LightRAG"
82
+ Repository = "https://github.com/HKUDS/LightRAG"
83
+ "Bug Tracker" = "https://github.com/HKUDS/LightRAG/issues"
84
+
85
+ [tool.setuptools]
86
+ packages = ["lightrag"]
87
+ include-package-data = true
88
+
89
+ [tool.setuptools.dynamic]
90
+ version = {attr = "lightrag.__version__"}
91
+
92
+ [tool.setuptools.package-data]
93
+ lightrag = ["api/webui/**/*"]
setup.py CHANGED
@@ -1,106 +1,6 @@
1
- import setuptools
2
- from pathlib import Path
3
 
 
4
 
5
- # Reading the long description from README.md
6
- def read_long_description():
7
- try:
8
- return Path("README.md").read_text(encoding="utf-8")
9
- except FileNotFoundError:
10
- return "A description of LightRAG is currently unavailable."
11
-
12
-
13
- # Retrieving metadata from __init__.py
14
- def retrieve_metadata():
15
- vars2find = ["__author__", "__version__", "__url__"]
16
- vars2readme = {}
17
- try:
18
- with open("./lightrag/__init__.py") as f:
19
- for line in f.readlines():
20
- for v in vars2find:
21
- if line.startswith(v):
22
- line = (
23
- line.replace(" ", "")
24
- .replace('"', "")
25
- .replace("'", "")
26
- .strip()
27
- )
28
- vars2readme[v] = line.split("=")[1]
29
- except FileNotFoundError:
30
- raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.")
31
-
32
- # Checking if all required variables are found
33
- missing_vars = [v for v in vars2find if v not in vars2readme]
34
- if missing_vars:
35
- raise ValueError(
36
- f"Missing required metadata variables in __init__.py: {missing_vars}"
37
- )
38
-
39
- return vars2readme
40
-
41
-
42
- # Reading dependencies from requirements.txt
43
- def read_requirements(file_path="requirements.txt"):
44
- deps = []
45
- try:
46
- with open(file_path) as f:
47
- deps = [
48
- line.strip() for line in f if line.strip() and not line.startswith("#")
49
- ]
50
- except FileNotFoundError:
51
- print(f"Warning: '{file_path}' not found. No dependencies will be installed.")
52
- return deps
53
-
54
-
55
- def read_api_requirements():
56
- return read_requirements("lightrag/api/requirements.txt")
57
-
58
-
59
- def read_extra_requirements():
60
- return read_requirements("lightrag/tools/lightrag_visualizer/requirements.txt")
61
-
62
-
63
- metadata = retrieve_metadata()
64
- long_description = read_long_description()
65
- requirements = read_requirements()
66
-
67
- setuptools.setup(
68
- name="lightrag-hku",
69
- url=metadata["__url__"],
70
- version=metadata["__version__"],
71
- author=metadata["__author__"],
72
- description="LightRAG: Simple and Fast Retrieval-Augmented Generation",
73
- long_description=long_description,
74
- long_description_content_type="text/markdown",
75
- packages=setuptools.find_packages(
76
- exclude=("tests*", "docs*")
77
- ), # Automatically find packages
78
- classifiers=[
79
- "Development Status :: 4 - Beta",
80
- "Programming Language :: Python :: 3",
81
- "License :: OSI Approved :: MIT License",
82
- "Operating System :: OS Independent",
83
- "Intended Audience :: Developers",
84
- "Topic :: Software Development :: Libraries :: Python Modules",
85
- ],
86
- python_requires=">=3.9",
87
- install_requires=requirements,
88
- include_package_data=True, # Includes non-code files from MANIFEST.in
89
- project_urls={ # Additional project metadata
90
- "Documentation": metadata.get("__url__", ""),
91
- "Source": metadata.get("__url__", ""),
92
- "Tracker": f"{metadata.get('__url__', '')}/issues"
93
- if metadata.get("__url__")
94
- else "",
95
- },
96
- extras_require={
97
- "api": requirements + read_api_requirements(),
98
- "tools": read_extra_requirements(), # API requirements as optional
99
- },
100
- entry_points={
101
- "console_scripts": [
102
- "lightrag-server=lightrag.api.lightrag_server:main [api]",
103
- "lightrag-gunicorn=lightrag.api.run_with_gunicorn:main [api]",
104
- ],
105
- },
106
- )
 
1
+ # Minimal setup.py for backward compatibility
2
+ # Primary configuration is now in pyproject.toml
3
 
4
+ from setuptools import setup
5
 
6
+ setup()