Fix default workspace name for PostgreSQL AGE graph storage
Browse files- README-zh.md +1 -1
- README.md +1 -1
- lightrag/kg/postgres_impl.py +62 -43
README-zh.md
CHANGED
|
@@ -882,7 +882,7 @@ rag = LightRAG(
|
|
| 882 |
|
| 883 |
* **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage
|
| 884 |
|
| 885 |
-
为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL的
|
| 886 |
|
| 887 |
## 编辑实体和关系
|
| 888 |
|
|
|
|
| 882 |
|
| 883 |
* **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage
|
| 884 |
|
| 885 |
+
为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL非图存储的工作空间为`default`,PostgreSQL AGE图存储的工作空间为空,Neo4j图存储的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`。
|
| 886 |
|
| 887 |
## 编辑实体和关系
|
| 888 |
|
README.md
CHANGED
|
@@ -928,7 +928,7 @@ The `workspace` parameter ensures data isolation between different LightRAG inst
|
|
| 928 |
- **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
|
| 929 |
- **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
|
| 930 |
|
| 931 |
-
To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
|
| 932 |
|
| 933 |
## Edit Entities and Relations
|
| 934 |
|
|
|
|
| 928 |
- **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
|
| 929 |
- **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
|
| 930 |
|
| 931 |
+
To maintain compatibility with legacy data, the default workspace for PostgreSQL non-graph storage is `default` and, for PostgreSQL AGE graph storage is null, for Neo4j graph storage is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
|
| 932 |
|
| 933 |
## Edit Entities and Relations
|
| 934 |
|
lightrag/kg/postgres_impl.py
CHANGED
|
@@ -115,34 +115,46 @@ class PostgreSQLDB:
|
|
| 115 |
WHERE table_name = 'lightrag_llm_cache'
|
| 116 |
AND column_name IN ('chunk_id', 'cache_type')
|
| 117 |
"""
|
| 118 |
-
|
| 119 |
existing_columns = await self.query(check_columns_sql, multirows=True)
|
| 120 |
-
existing_column_names =
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# Add missing chunk_id column
|
| 123 |
-
if
|
| 124 |
logger.info("Adding chunk_id column to LIGHTRAG_LLM_CACHE table")
|
| 125 |
add_chunk_id_sql = """
|
| 126 |
ALTER TABLE LIGHTRAG_LLM_CACHE
|
| 127 |
ADD COLUMN chunk_id VARCHAR(255) NULL
|
| 128 |
"""
|
| 129 |
await self.execute(add_chunk_id_sql)
|
| 130 |
-
logger.info(
|
|
|
|
|
|
|
| 131 |
else:
|
| 132 |
-
logger.info(
|
| 133 |
-
|
|
|
|
|
|
|
| 134 |
# Add missing cache_type column
|
| 135 |
-
if
|
| 136 |
logger.info("Adding cache_type column to LIGHTRAG_LLM_CACHE table")
|
| 137 |
add_cache_type_sql = """
|
| 138 |
ALTER TABLE LIGHTRAG_LLM_CACHE
|
| 139 |
ADD COLUMN cache_type VARCHAR(32) NULL
|
| 140 |
"""
|
| 141 |
await self.execute(add_cache_type_sql)
|
| 142 |
-
logger.info(
|
| 143 |
-
|
|
|
|
|
|
|
| 144 |
# Migrate existing data using optimized regex pattern
|
| 145 |
-
logger.info(
|
|
|
|
|
|
|
| 146 |
optimized_update_sql = """
|
| 147 |
UPDATE LIGHTRAG_LLM_CACHE
|
| 148 |
SET cache_type = CASE
|
|
@@ -154,8 +166,10 @@ class PostgreSQLDB:
|
|
| 154 |
await self.execute(optimized_update_sql)
|
| 155 |
logger.info("Successfully migrated existing LLM cache data")
|
| 156 |
else:
|
| 157 |
-
logger.info(
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
except Exception as e:
|
| 160 |
logger.warning(f"Failed to add columns to LIGHTRAG_LLM_CACHE: {e}")
|
| 161 |
|
|
@@ -288,80 +302,85 @@ class PostgreSQLDB:
|
|
| 288 |
logger.warning(f"Failed to check LLM cache migration status: {e}")
|
| 289 |
return False
|
| 290 |
|
| 291 |
-
|
| 292 |
async def _migrate_llm_cache_to_flattened_keys(self):
|
| 293 |
"""Optimized version: directly execute single UPDATE migration to migrate old format cache keys to flattened format"""
|
| 294 |
try:
|
| 295 |
# Check if migration is needed
|
| 296 |
check_sql = """
|
| 297 |
-
SELECT COUNT(*) as count FROM LIGHTRAG_LLM_CACHE
|
| 298 |
WHERE id NOT LIKE '%:%'
|
| 299 |
"""
|
| 300 |
result = await self.query(check_sql)
|
| 301 |
-
|
| 302 |
if not result or result["count"] == 0:
|
| 303 |
logger.info("No old format LLM cache data found, skipping migration")
|
| 304 |
return
|
| 305 |
-
|
| 306 |
old_count = result["count"]
|
| 307 |
logger.info(f"Found {old_count} old format cache records")
|
| 308 |
-
|
| 309 |
# Check potential primary key conflicts (optional but recommended)
|
| 310 |
conflict_check_sql = """
|
| 311 |
WITH new_ids AS (
|
| 312 |
-
SELECT
|
| 313 |
workspace,
|
| 314 |
mode,
|
| 315 |
id as old_id,
|
| 316 |
-
mode || ':' ||
|
| 317 |
CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
|
| 318 |
md5(mode || original_prompt) as new_id
|
| 319 |
-
FROM LIGHTRAG_LLM_CACHE
|
| 320 |
WHERE id NOT LIKE '%:%'
|
| 321 |
)
|
| 322 |
-
SELECT COUNT(*) as conflicts
|
| 323 |
FROM new_ids n1
|
| 324 |
-
JOIN LIGHTRAG_LLM_CACHE existing
|
| 325 |
-
ON existing.workspace = n1.workspace
|
| 326 |
-
AND existing.mode = n1.mode
|
| 327 |
AND existing.id = n1.new_id
|
| 328 |
WHERE existing.id LIKE '%:%' -- Only check conflicts with existing new format records
|
| 329 |
"""
|
| 330 |
-
|
| 331 |
conflict_result = await self.query(conflict_check_sql)
|
| 332 |
if conflict_result and conflict_result["conflicts"] > 0:
|
| 333 |
-
logger.warning(
|
|
|
|
|
|
|
| 334 |
# Can choose to continue or abort, here we choose to continue and log warning
|
| 335 |
-
|
| 336 |
# Execute single UPDATE migration
|
| 337 |
logger.info("Starting optimized LLM cache migration...")
|
| 338 |
migration_sql = """
|
| 339 |
-
UPDATE LIGHTRAG_LLM_CACHE
|
| 340 |
-
SET
|
| 341 |
-
id = mode || ':' ||
|
| 342 |
CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
|
| 343 |
md5(mode || original_prompt),
|
| 344 |
cache_type = CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END,
|
| 345 |
update_time = CURRENT_TIMESTAMP
|
| 346 |
WHERE id NOT LIKE '%:%'
|
| 347 |
"""
|
| 348 |
-
|
| 349 |
# Execute migration
|
| 350 |
await self.execute(migration_sql)
|
| 351 |
-
|
| 352 |
# Verify migration results
|
| 353 |
verify_sql = """
|
| 354 |
-
SELECT COUNT(*) as remaining_old FROM LIGHTRAG_LLM_CACHE
|
| 355 |
WHERE id NOT LIKE '%:%'
|
| 356 |
"""
|
| 357 |
verify_result = await self.query(verify_sql)
|
| 358 |
remaining = verify_result["remaining_old"] if verify_result else -1
|
| 359 |
-
|
| 360 |
if remaining == 0:
|
| 361 |
-
logger.info(
|
|
|
|
|
|
|
| 362 |
else:
|
| 363 |
-
logger.warning(
|
| 364 |
-
|
|
|
|
|
|
|
| 365 |
except Exception as e:
|
| 366 |
logger.error(f"Optimized LLM cache migration failed: {e}")
|
| 367 |
raise
|
|
@@ -1767,8 +1786,8 @@ class PGGraphStorage(BaseGraphStorage):
|
|
| 1767 |
"""
|
| 1768 |
Generate graph name based on workspace and namespace for data isolation.
|
| 1769 |
Rules:
|
| 1770 |
-
- If workspace is empty: graph_name = namespace
|
| 1771 |
-
- If workspace has value: graph_name = workspace_namespace
|
| 1772 |
|
| 1773 |
Args:
|
| 1774 |
None
|
|
@@ -1777,15 +1796,15 @@ class PGGraphStorage(BaseGraphStorage):
|
|
| 1777 |
str: The graph name for the current workspace
|
| 1778 |
"""
|
| 1779 |
workspace = getattr(self, "workspace", None)
|
| 1780 |
-
namespace = self.namespace
|
| 1781 |
|
| 1782 |
-
if workspace and workspace.strip():
|
| 1783 |
# Ensure names comply with PostgreSQL identifier specifications
|
| 1784 |
safe_workspace = re.sub(r"[^a-zA-Z0-9_]", "_", workspace.strip())
|
| 1785 |
safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
| 1786 |
return f"{safe_workspace}_{safe_namespace}"
|
| 1787 |
else:
|
| 1788 |
-
# When workspace is empty, use namespace directly
|
| 1789 |
return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
| 1790 |
|
| 1791 |
@staticmethod
|
|
|
|
| 115 |
WHERE table_name = 'lightrag_llm_cache'
|
| 116 |
AND column_name IN ('chunk_id', 'cache_type')
|
| 117 |
"""
|
| 118 |
+
|
| 119 |
existing_columns = await self.query(check_columns_sql, multirows=True)
|
| 120 |
+
existing_column_names = (
|
| 121 |
+
{col["column_name"] for col in existing_columns}
|
| 122 |
+
if existing_columns
|
| 123 |
+
else set()
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
# Add missing chunk_id column
|
| 127 |
+
if "chunk_id" not in existing_column_names:
|
| 128 |
logger.info("Adding chunk_id column to LIGHTRAG_LLM_CACHE table")
|
| 129 |
add_chunk_id_sql = """
|
| 130 |
ALTER TABLE LIGHTRAG_LLM_CACHE
|
| 131 |
ADD COLUMN chunk_id VARCHAR(255) NULL
|
| 132 |
"""
|
| 133 |
await self.execute(add_chunk_id_sql)
|
| 134 |
+
logger.info(
|
| 135 |
+
"Successfully added chunk_id column to LIGHTRAG_LLM_CACHE table"
|
| 136 |
+
)
|
| 137 |
else:
|
| 138 |
+
logger.info(
|
| 139 |
+
"chunk_id column already exists in LIGHTRAG_LLM_CACHE table"
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
# Add missing cache_type column
|
| 143 |
+
if "cache_type" not in existing_column_names:
|
| 144 |
logger.info("Adding cache_type column to LIGHTRAG_LLM_CACHE table")
|
| 145 |
add_cache_type_sql = """
|
| 146 |
ALTER TABLE LIGHTRAG_LLM_CACHE
|
| 147 |
ADD COLUMN cache_type VARCHAR(32) NULL
|
| 148 |
"""
|
| 149 |
await self.execute(add_cache_type_sql)
|
| 150 |
+
logger.info(
|
| 151 |
+
"Successfully added cache_type column to LIGHTRAG_LLM_CACHE table"
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
# Migrate existing data using optimized regex pattern
|
| 155 |
+
logger.info(
|
| 156 |
+
"Migrating existing LLM cache data to populate cache_type field (optimized)"
|
| 157 |
+
)
|
| 158 |
optimized_update_sql = """
|
| 159 |
UPDATE LIGHTRAG_LLM_CACHE
|
| 160 |
SET cache_type = CASE
|
|
|
|
| 166 |
await self.execute(optimized_update_sql)
|
| 167 |
logger.info("Successfully migrated existing LLM cache data")
|
| 168 |
else:
|
| 169 |
+
logger.info(
|
| 170 |
+
"cache_type column already exists in LIGHTRAG_LLM_CACHE table"
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
except Exception as e:
|
| 174 |
logger.warning(f"Failed to add columns to LIGHTRAG_LLM_CACHE: {e}")
|
| 175 |
|
|
|
|
| 302 |
logger.warning(f"Failed to check LLM cache migration status: {e}")
|
| 303 |
return False
|
| 304 |
|
|
|
|
| 305 |
async def _migrate_llm_cache_to_flattened_keys(self):
|
| 306 |
"""Optimized version: directly execute single UPDATE migration to migrate old format cache keys to flattened format"""
|
| 307 |
try:
|
| 308 |
# Check if migration is needed
|
| 309 |
check_sql = """
|
| 310 |
+
SELECT COUNT(*) as count FROM LIGHTRAG_LLM_CACHE
|
| 311 |
WHERE id NOT LIKE '%:%'
|
| 312 |
"""
|
| 313 |
result = await self.query(check_sql)
|
| 314 |
+
|
| 315 |
if not result or result["count"] == 0:
|
| 316 |
logger.info("No old format LLM cache data found, skipping migration")
|
| 317 |
return
|
| 318 |
+
|
| 319 |
old_count = result["count"]
|
| 320 |
logger.info(f"Found {old_count} old format cache records")
|
| 321 |
+
|
| 322 |
# Check potential primary key conflicts (optional but recommended)
|
| 323 |
conflict_check_sql = """
|
| 324 |
WITH new_ids AS (
|
| 325 |
+
SELECT
|
| 326 |
workspace,
|
| 327 |
mode,
|
| 328 |
id as old_id,
|
| 329 |
+
mode || ':' ||
|
| 330 |
CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
|
| 331 |
md5(mode || original_prompt) as new_id
|
| 332 |
+
FROM LIGHTRAG_LLM_CACHE
|
| 333 |
WHERE id NOT LIKE '%:%'
|
| 334 |
)
|
| 335 |
+
SELECT COUNT(*) as conflicts
|
| 336 |
FROM new_ids n1
|
| 337 |
+
JOIN LIGHTRAG_LLM_CACHE existing
|
| 338 |
+
ON existing.workspace = n1.workspace
|
| 339 |
+
AND existing.mode = n1.mode
|
| 340 |
AND existing.id = n1.new_id
|
| 341 |
WHERE existing.id LIKE '%:%' -- Only check conflicts with existing new format records
|
| 342 |
"""
|
| 343 |
+
|
| 344 |
conflict_result = await self.query(conflict_check_sql)
|
| 345 |
if conflict_result and conflict_result["conflicts"] > 0:
|
| 346 |
+
logger.warning(
|
| 347 |
+
f"Found {conflict_result['conflicts']} potential ID conflicts with existing records"
|
| 348 |
+
)
|
| 349 |
# Can choose to continue or abort, here we choose to continue and log warning
|
| 350 |
+
|
| 351 |
# Execute single UPDATE migration
|
| 352 |
logger.info("Starting optimized LLM cache migration...")
|
| 353 |
migration_sql = """
|
| 354 |
+
UPDATE LIGHTRAG_LLM_CACHE
|
| 355 |
+
SET
|
| 356 |
+
id = mode || ':' ||
|
| 357 |
CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
|
| 358 |
md5(mode || original_prompt),
|
| 359 |
cache_type = CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END,
|
| 360 |
update_time = CURRENT_TIMESTAMP
|
| 361 |
WHERE id NOT LIKE '%:%'
|
| 362 |
"""
|
| 363 |
+
|
| 364 |
# Execute migration
|
| 365 |
await self.execute(migration_sql)
|
| 366 |
+
|
| 367 |
# Verify migration results
|
| 368 |
verify_sql = """
|
| 369 |
+
SELECT COUNT(*) as remaining_old FROM LIGHTRAG_LLM_CACHE
|
| 370 |
WHERE id NOT LIKE '%:%'
|
| 371 |
"""
|
| 372 |
verify_result = await self.query(verify_sql)
|
| 373 |
remaining = verify_result["remaining_old"] if verify_result else -1
|
| 374 |
+
|
| 375 |
if remaining == 0:
|
| 376 |
+
logger.info(
|
| 377 |
+
f"✅ Successfully migrated {old_count} LLM cache records to flattened format"
|
| 378 |
+
)
|
| 379 |
else:
|
| 380 |
+
logger.warning(
|
| 381 |
+
f"⚠️ Migration completed but {remaining} old format records remain"
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
except Exception as e:
|
| 385 |
logger.error(f"Optimized LLM cache migration failed: {e}")
|
| 386 |
raise
|
|
|
|
| 1786 |
"""
|
| 1787 |
Generate graph name based on workspace and namespace for data isolation.
|
| 1788 |
Rules:
|
| 1789 |
+
- If workspace is empty or "default": graph_name = namespace
|
| 1790 |
+
- If workspace has other value: graph_name = workspace_namespace
|
| 1791 |
|
| 1792 |
Args:
|
| 1793 |
None
|
|
|
|
| 1796 |
str: The graph name for the current workspace
|
| 1797 |
"""
|
| 1798 |
workspace = getattr(self, "workspace", None)
|
| 1799 |
+
namespace = self.namespace
|
| 1800 |
|
| 1801 |
+
if workspace and workspace.strip() and workspace.strip().lower() != "default":
|
| 1802 |
# Ensure names comply with PostgreSQL identifier specifications
|
| 1803 |
safe_workspace = re.sub(r"[^a-zA-Z0-9_]", "_", workspace.strip())
|
| 1804 |
safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
| 1805 |
return f"{safe_workspace}_{safe_namespace}"
|
| 1806 |
else:
|
| 1807 |
+
# When workspace is empty or "default", use namespace directly
|
| 1808 |
return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
| 1809 |
|
| 1810 |
@staticmethod
|