gzdaniel commited on
Commit
5d6d1fd
·
1 Parent(s): b602fc1

Fix default workspace name for PostgreSQL AGE graph storage

Browse files
Files changed (3) hide show
  1. README-zh.md +1 -1
  2. README.md +1 -1
  3. lightrag/kg/postgres_impl.py +62 -43
README-zh.md CHANGED
@@ -882,7 +882,7 @@ rag = LightRAG(
882
 
883
  * **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage
884
 
885
- 为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL的默认工作空间为`default`,Neo4j的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`。
886
 
887
  ## 编辑实体和关系
888
 
 
882
 
883
  * **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage
884
 
885
+ 为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL非图存储的工作空间为`default`,PostgreSQL AGE图存储的工作空间为空,Neo4j图存储的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`。
886
 
887
  ## 编辑实体和关系
888
 
README.md CHANGED
@@ -928,7 +928,7 @@ The `workspace` parameter ensures data isolation between different LightRAG inst
928
  - **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
929
  - **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
930
 
931
- To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
932
 
933
  ## Edit Entities and Relations
934
 
 
928
  - **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
929
  - **For the Neo4j graph database, logical data isolation is achieved through labels:** `Neo4JStorage`
930
 
931
+ To maintain compatibility with legacy data, the default workspace for PostgreSQL non-graph storage is `default` and, for PostgreSQL AGE graph storage is null, for Neo4j graph storage is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`.
932
 
933
  ## Edit Entities and Relations
934
 
lightrag/kg/postgres_impl.py CHANGED
@@ -115,34 +115,46 @@ class PostgreSQLDB:
115
  WHERE table_name = 'lightrag_llm_cache'
116
  AND column_name IN ('chunk_id', 'cache_type')
117
  """
118
-
119
  existing_columns = await self.query(check_columns_sql, multirows=True)
120
- existing_column_names = {col['column_name'] for col in existing_columns} if existing_columns else set()
121
-
 
 
 
 
122
  # Add missing chunk_id column
123
- if 'chunk_id' not in existing_column_names:
124
  logger.info("Adding chunk_id column to LIGHTRAG_LLM_CACHE table")
125
  add_chunk_id_sql = """
126
  ALTER TABLE LIGHTRAG_LLM_CACHE
127
  ADD COLUMN chunk_id VARCHAR(255) NULL
128
  """
129
  await self.execute(add_chunk_id_sql)
130
- logger.info("Successfully added chunk_id column to LIGHTRAG_LLM_CACHE table")
 
 
131
  else:
132
- logger.info("chunk_id column already exists in LIGHTRAG_LLM_CACHE table")
133
-
 
 
134
  # Add missing cache_type column
135
- if 'cache_type' not in existing_column_names:
136
  logger.info("Adding cache_type column to LIGHTRAG_LLM_CACHE table")
137
  add_cache_type_sql = """
138
  ALTER TABLE LIGHTRAG_LLM_CACHE
139
  ADD COLUMN cache_type VARCHAR(32) NULL
140
  """
141
  await self.execute(add_cache_type_sql)
142
- logger.info("Successfully added cache_type column to LIGHTRAG_LLM_CACHE table")
143
-
 
 
144
  # Migrate existing data using optimized regex pattern
145
- logger.info("Migrating existing LLM cache data to populate cache_type field (optimized)")
 
 
146
  optimized_update_sql = """
147
  UPDATE LIGHTRAG_LLM_CACHE
148
  SET cache_type = CASE
@@ -154,8 +166,10 @@ class PostgreSQLDB:
154
  await self.execute(optimized_update_sql)
155
  logger.info("Successfully migrated existing LLM cache data")
156
  else:
157
- logger.info("cache_type column already exists in LIGHTRAG_LLM_CACHE table")
158
-
 
 
159
  except Exception as e:
160
  logger.warning(f"Failed to add columns to LIGHTRAG_LLM_CACHE: {e}")
161
 
@@ -288,80 +302,85 @@ class PostgreSQLDB:
288
  logger.warning(f"Failed to check LLM cache migration status: {e}")
289
  return False
290
 
291
-
292
  async def _migrate_llm_cache_to_flattened_keys(self):
293
  """Optimized version: directly execute single UPDATE migration to migrate old format cache keys to flattened format"""
294
  try:
295
  # Check if migration is needed
296
  check_sql = """
297
- SELECT COUNT(*) as count FROM LIGHTRAG_LLM_CACHE
298
  WHERE id NOT LIKE '%:%'
299
  """
300
  result = await self.query(check_sql)
301
-
302
  if not result or result["count"] == 0:
303
  logger.info("No old format LLM cache data found, skipping migration")
304
  return
305
-
306
  old_count = result["count"]
307
  logger.info(f"Found {old_count} old format cache records")
308
-
309
  # Check potential primary key conflicts (optional but recommended)
310
  conflict_check_sql = """
311
  WITH new_ids AS (
312
- SELECT
313
  workspace,
314
  mode,
315
  id as old_id,
316
- mode || ':' ||
317
  CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
318
  md5(mode || original_prompt) as new_id
319
- FROM LIGHTRAG_LLM_CACHE
320
  WHERE id NOT LIKE '%:%'
321
  )
322
- SELECT COUNT(*) as conflicts
323
  FROM new_ids n1
324
- JOIN LIGHTRAG_LLM_CACHE existing
325
- ON existing.workspace = n1.workspace
326
- AND existing.mode = n1.mode
327
  AND existing.id = n1.new_id
328
  WHERE existing.id LIKE '%:%' -- Only check conflicts with existing new format records
329
  """
330
-
331
  conflict_result = await self.query(conflict_check_sql)
332
  if conflict_result and conflict_result["conflicts"] > 0:
333
- logger.warning(f"Found {conflict_result['conflicts']} potential ID conflicts with existing records")
 
 
334
  # Can choose to continue or abort, here we choose to continue and log warning
335
-
336
  # Execute single UPDATE migration
337
  logger.info("Starting optimized LLM cache migration...")
338
  migration_sql = """
339
- UPDATE LIGHTRAG_LLM_CACHE
340
- SET
341
- id = mode || ':' ||
342
  CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
343
  md5(mode || original_prompt),
344
  cache_type = CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END,
345
  update_time = CURRENT_TIMESTAMP
346
  WHERE id NOT LIKE '%:%'
347
  """
348
-
349
  # Execute migration
350
  await self.execute(migration_sql)
351
-
352
  # Verify migration results
353
  verify_sql = """
354
- SELECT COUNT(*) as remaining_old FROM LIGHTRAG_LLM_CACHE
355
  WHERE id NOT LIKE '%:%'
356
  """
357
  verify_result = await self.query(verify_sql)
358
  remaining = verify_result["remaining_old"] if verify_result else -1
359
-
360
  if remaining == 0:
361
- logger.info(f"✅ Successfully migrated {old_count} LLM cache records to flattened format")
 
 
362
  else:
363
- logger.warning(f"⚠️ Migration completed but {remaining} old format records remain")
364
-
 
 
365
  except Exception as e:
366
  logger.error(f"Optimized LLM cache migration failed: {e}")
367
  raise
@@ -1767,8 +1786,8 @@ class PGGraphStorage(BaseGraphStorage):
1767
  """
1768
  Generate graph name based on workspace and namespace for data isolation.
1769
  Rules:
1770
- - If workspace is empty: graph_name = namespace
1771
- - If workspace has value: graph_name = workspace_namespace
1772
 
1773
  Args:
1774
  None
@@ -1777,15 +1796,15 @@ class PGGraphStorage(BaseGraphStorage):
1777
  str: The graph name for the current workspace
1778
  """
1779
  workspace = getattr(self, "workspace", None)
1780
- namespace = self.namespace or os.environ.get("AGE_GRAPH_NAME", "lightrag")
1781
 
1782
- if workspace and workspace.strip():
1783
  # Ensure names comply with PostgreSQL identifier specifications
1784
  safe_workspace = re.sub(r"[^a-zA-Z0-9_]", "_", workspace.strip())
1785
  safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
1786
  return f"{safe_workspace}_{safe_namespace}"
1787
  else:
1788
- # When workspace is empty, use namespace directly
1789
  return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
1790
 
1791
  @staticmethod
 
115
  WHERE table_name = 'lightrag_llm_cache'
116
  AND column_name IN ('chunk_id', 'cache_type')
117
  """
118
+
119
  existing_columns = await self.query(check_columns_sql, multirows=True)
120
+ existing_column_names = (
121
+ {col["column_name"] for col in existing_columns}
122
+ if existing_columns
123
+ else set()
124
+ )
125
+
126
  # Add missing chunk_id column
127
+ if "chunk_id" not in existing_column_names:
128
  logger.info("Adding chunk_id column to LIGHTRAG_LLM_CACHE table")
129
  add_chunk_id_sql = """
130
  ALTER TABLE LIGHTRAG_LLM_CACHE
131
  ADD COLUMN chunk_id VARCHAR(255) NULL
132
  """
133
  await self.execute(add_chunk_id_sql)
134
+ logger.info(
135
+ "Successfully added chunk_id column to LIGHTRAG_LLM_CACHE table"
136
+ )
137
  else:
138
+ logger.info(
139
+ "chunk_id column already exists in LIGHTRAG_LLM_CACHE table"
140
+ )
141
+
142
  # Add missing cache_type column
143
+ if "cache_type" not in existing_column_names:
144
  logger.info("Adding cache_type column to LIGHTRAG_LLM_CACHE table")
145
  add_cache_type_sql = """
146
  ALTER TABLE LIGHTRAG_LLM_CACHE
147
  ADD COLUMN cache_type VARCHAR(32) NULL
148
  """
149
  await self.execute(add_cache_type_sql)
150
+ logger.info(
151
+ "Successfully added cache_type column to LIGHTRAG_LLM_CACHE table"
152
+ )
153
+
154
  # Migrate existing data using optimized regex pattern
155
+ logger.info(
156
+ "Migrating existing LLM cache data to populate cache_type field (optimized)"
157
+ )
158
  optimized_update_sql = """
159
  UPDATE LIGHTRAG_LLM_CACHE
160
  SET cache_type = CASE
 
166
  await self.execute(optimized_update_sql)
167
  logger.info("Successfully migrated existing LLM cache data")
168
  else:
169
+ logger.info(
170
+ "cache_type column already exists in LIGHTRAG_LLM_CACHE table"
171
+ )
172
+
173
  except Exception as e:
174
  logger.warning(f"Failed to add columns to LIGHTRAG_LLM_CACHE: {e}")
175
 
 
302
  logger.warning(f"Failed to check LLM cache migration status: {e}")
303
  return False
304
 
 
305
  async def _migrate_llm_cache_to_flattened_keys(self):
306
  """Optimized version: directly execute single UPDATE migration to migrate old format cache keys to flattened format"""
307
  try:
308
  # Check if migration is needed
309
  check_sql = """
310
+ SELECT COUNT(*) as count FROM LIGHTRAG_LLM_CACHE
311
  WHERE id NOT LIKE '%:%'
312
  """
313
  result = await self.query(check_sql)
314
+
315
  if not result or result["count"] == 0:
316
  logger.info("No old format LLM cache data found, skipping migration")
317
  return
318
+
319
  old_count = result["count"]
320
  logger.info(f"Found {old_count} old format cache records")
321
+
322
  # Check potential primary key conflicts (optional but recommended)
323
  conflict_check_sql = """
324
  WITH new_ids AS (
325
+ SELECT
326
  workspace,
327
  mode,
328
  id as old_id,
329
+ mode || ':' ||
330
  CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
331
  md5(mode || original_prompt) as new_id
332
+ FROM LIGHTRAG_LLM_CACHE
333
  WHERE id NOT LIKE '%:%'
334
  )
335
+ SELECT COUNT(*) as conflicts
336
  FROM new_ids n1
337
+ JOIN LIGHTRAG_LLM_CACHE existing
338
+ ON existing.workspace = n1.workspace
339
+ AND existing.mode = n1.mode
340
  AND existing.id = n1.new_id
341
  WHERE existing.id LIKE '%:%' -- Only check conflicts with existing new format records
342
  """
343
+
344
  conflict_result = await self.query(conflict_check_sql)
345
  if conflict_result and conflict_result["conflicts"] > 0:
346
+ logger.warning(
347
+ f"Found {conflict_result['conflicts']} potential ID conflicts with existing records"
348
+ )
349
  # Can choose to continue or abort, here we choose to continue and log warning
350
+
351
  # Execute single UPDATE migration
352
  logger.info("Starting optimized LLM cache migration...")
353
  migration_sql = """
354
+ UPDATE LIGHTRAG_LLM_CACHE
355
+ SET
356
+ id = mode || ':' ||
357
  CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END || ':' ||
358
  md5(mode || original_prompt),
359
  cache_type = CASE WHEN mode = 'default' THEN 'extract' ELSE 'unknown' END,
360
  update_time = CURRENT_TIMESTAMP
361
  WHERE id NOT LIKE '%:%'
362
  """
363
+
364
  # Execute migration
365
  await self.execute(migration_sql)
366
+
367
  # Verify migration results
368
  verify_sql = """
369
+ SELECT COUNT(*) as remaining_old FROM LIGHTRAG_LLM_CACHE
370
  WHERE id NOT LIKE '%:%'
371
  """
372
  verify_result = await self.query(verify_sql)
373
  remaining = verify_result["remaining_old"] if verify_result else -1
374
+
375
  if remaining == 0:
376
+ logger.info(
377
+ f"✅ Successfully migrated {old_count} LLM cache records to flattened format"
378
+ )
379
  else:
380
+ logger.warning(
381
+ f"⚠️ Migration completed but {remaining} old format records remain"
382
+ )
383
+
384
  except Exception as e:
385
  logger.error(f"Optimized LLM cache migration failed: {e}")
386
  raise
 
1786
  """
1787
  Generate graph name based on workspace and namespace for data isolation.
1788
  Rules:
1789
+ - If workspace is empty or "default": graph_name = namespace
1790
+ - If workspace has other value: graph_name = workspace_namespace
1791
 
1792
  Args:
1793
  None
 
1796
  str: The graph name for the current workspace
1797
  """
1798
  workspace = getattr(self, "workspace", None)
1799
+ namespace = self.namespace
1800
 
1801
+ if workspace and workspace.strip() and workspace.strip().lower() != "default":
1802
  # Ensure names comply with PostgreSQL identifier specifications
1803
  safe_workspace = re.sub(r"[^a-zA-Z0-9_]", "_", workspace.strip())
1804
  safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
1805
  return f"{safe_workspace}_{safe_namespace}"
1806
  else:
1807
+ # When workspace is empty or "default", use namespace directly
1808
  return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
1809
 
1810
  @staticmethod