Spaces:

qa1145
/

astrbbbb

Paused

App Files Files Community

astrbbbb / astrbot /core /backup /importer.py

qa1145

Upload 1245 files

8ede856 verified about 2 months ago

raw

history blame contribute delete

35.6 kB

	"""AstrBot 数据导入器

	负责从 ZIP 备份文件恢复所有数据。
	导入时进行版本校验：
	- 主版本（前两位）不同时直接拒绝导入
	- 小版本（第三位）不同时提示警告，用户可选择强制导入
	- 版本匹配时也需要用户确认
	"""

	import json
	import os
	import shutil
	import zipfile
	from dataclasses import dataclass, field
	from datetime import datetime, timezone
	from pathlib import Path
	from typing import TYPE_CHECKING, Any

	from sqlalchemy import delete

	from astrbot.core import logger
	from astrbot.core.config.default import VERSION
	from astrbot.core.db import BaseDatabase
	from astrbot.core.utils.astrbot_path import (
	get_astrbot_data_path,
	get_astrbot_knowledge_base_path,
	)
	from astrbot.core.utils.version_comparator import VersionComparator

	# 从共享常量模块导入
	from .constants import (
	KB_METADATA_MODELS,
	MAIN_DB_MODELS,
	get_backup_directories,
	)

	if TYPE_CHECKING:
	from astrbot.core.knowledge_base.kb_mgr import KnowledgeBaseManager


	def _get_major_version(version_str: str) -> str:
	"""提取版本的主版本部分（前两位）

	Args:
	version_str: 版本字符串，如 "4.9.1", "4.10.0-beta"

	Returns:
	主版本字符串，如 "4.9", "4.10"
	"""
	if not version_str:
	return "0.0"
	# 移除 v 前缀和预发布标签
	version = version_str.lower().replace("v", "").split("-")[0].split("+")[0]
	parts = [p for p in version.split(".") if p] # 过滤空字符串
	if len(parts) >= 2:
	return f"{parts[0]}.{parts[1]}"
	elif len(parts) == 1 and parts[0]:
	return f"{parts[0]}.0"
	return "0.0"


	CMD_CONFIG_FILE_PATH = os.path.join(get_astrbot_data_path(), "cmd_config.json")
	KB_PATH = get_astrbot_knowledge_base_path()
	DEFAULT_PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT = 5
	PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT_ENV = (
	"ASTRBOT_PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT"
	)


	def _load_platform_stats_invalid_count_warn_limit() -> int:
	raw_value = os.getenv(PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT_ENV)
	if raw_value is None:
	return DEFAULT_PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT

	try:
	value = int(raw_value)
	if value < 0:
	raise ValueError("negative")
	return value
	except (TypeError, ValueError):
	logger.warning(
	"Invalid env %s=%r, fallback to default %d",
	PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT_ENV,
	raw_value,
	DEFAULT_PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT,
	)
	return DEFAULT_PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT


	PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT = (
	_load_platform_stats_invalid_count_warn_limit()
	)


	class _InvalidCountWarnLimiter:
	"""Rate-limit warnings for invalid platform_stats count values."""

	def __init__(self, limit: int) -> None:
	self.limit = limit
	self._count = 0
	self._suppression_logged = False

	def warn_invalid_count(self, value: Any, key_for_log: tuple[Any, ...]) -> None:
	if self.limit > 0:
	if self._count < self.limit:
	logger.warning(
	"platform_stats count 非法，已按 0 处理: value=%r, key=%s",
	value,
	key_for_log,
	)
	self._count += 1
	if self._count == self.limit and not self._suppression_logged:
	logger.warning(
	"platform_stats 非法 count 告警已达到上限 (%d)，后续将抑制",
	self.limit,
	)
	self._suppression_logged = True
	return

	if not self._suppression_logged:
	# limit <= 0: emit only one suppression warning.
	logger.warning(
	"platform_stats 非法 count 告警已达到上限 (%d)，后续将抑制",
	self.limit,
	)
	self._suppression_logged = True


	@dataclass
	class ImportPreCheckResult:
	"""导入预检查结果

	用于在实际导入前检查备份文件的版本兼容性，
	并返回确认信息让用户决定是否继续导入。
	"""

	# 检查是否通过（文件有效且版本可导入）
	valid: bool = False
	# 是否可以导入（版本兼容）
	can_import: bool = False
	# 版本状态: match（完全匹配）, minor_diff（小版本差异）, major_diff（主版本不同，拒绝）
	version_status: str = ""
	# 备份文件中的 AstrBot 版本
	backup_version: str = ""
	# 当前运行的 AstrBot 版本
	current_version: str = VERSION
	# 备份创建时间
	backup_time: str = ""
	# 确认消息（显示给用户）
	confirm_message: str = ""
	# 警告消息列表
	warnings: list[str] = field(default_factory=list)
	# 错误消息（如果检查失败）
	error: str = ""
	# 备份包含的内容摘要
	backup_summary: dict = field(default_factory=dict)

	def to_dict(self) -> dict:
	return {
	"valid": self.valid,
	"can_import": self.can_import,
	"version_status": self.version_status,
	"backup_version": self.backup_version,
	"current_version": self.current_version,
	"backup_time": self.backup_time,
	"confirm_message": self.confirm_message,
	"warnings": self.warnings,
	"error": self.error,
	"backup_summary": self.backup_summary,
	}


	class ImportResult:
	"""导入结果"""

	def __init__(self) -> None:
	self.success = True
	self.imported_tables: dict[str, int] = {}
	self.imported_files: dict[str, int] = {}
	self.imported_directories: dict[str, int] = {}
	self.warnings: list[str] = []
	self.errors: list[str] = []

	def add_warning(self, msg: str) -> None:
	self.warnings.append(msg)
	logger.warning(msg)

	def add_error(self, msg: str) -> None:
	self.errors.append(msg)
	self.success = False
	logger.error(msg)

	def to_dict(self) -> dict:
	return {
	"success": self.success,
	"imported_tables": self.imported_tables,
	"imported_files": self.imported_files,
	"imported_directories": self.imported_directories,
	"warnings": self.warnings,
	"errors": self.errors,
	}


	class DatabaseClearError(RuntimeError):
	"""Raised when clearing the main database in replace mode fails."""


	class AstrBotImporter:
	"""AstrBot 数据导入器

	导入备份文件中的所有数据，包括：
	- 主数据库所有表
	- 知识库元数据和文档
	- 配置文件
	- 附件文件
	- 知识库多媒体文件
	- 插件目录（data/plugins）
	- 插件数据目录（data/plugin_data）
	- 配置目录（data/config）
	- T2I 模板目录（data/t2i_templates）
	- WebChat 数据目录（data/webchat）
	- 临时文件目录（data/temp）
	"""

	def __init__(
	self,
	main_db: BaseDatabase,
	kb_manager: "KnowledgeBaseManager \| None" = None,
	config_path: str = CMD_CONFIG_FILE_PATH,
	kb_root_dir: str = KB_PATH,
	) -> None:
	self.main_db = main_db
	self.kb_manager = kb_manager
	self.config_path = config_path
	self.kb_root_dir = kb_root_dir

	def pre_check(self, zip_path: str) -> ImportPreCheckResult:
	"""预检查备份文件

	在实际导入前检查备份文件的有效性和版本兼容性。
	返回检查结果供前端显示确认对话框。

	Args:
	zip_path: ZIP 备份文件路径

	Returns:
	ImportPreCheckResult: 预检查结果
	"""
	result = ImportPreCheckResult()
	result.current_version = VERSION

	if not os.path.exists(zip_path):
	result.error = f"备份文件不存在: {zip_path}"
	return result

	try:
	with zipfile.ZipFile(zip_path, "r") as zf:
	# 读取 manifest
	try:
	manifest_data = zf.read("manifest.json")
	manifest = json.loads(manifest_data)
	except KeyError:
	result.error = "备份文件缺少 manifest.json，不是有效的 AstrBot 备份"
	return result
	except json.JSONDecodeError as e:
	result.error = f"manifest.json 格式错误: {e}"
	return result

	# 提取基本信息
	result.backup_version = manifest.get("astrbot_version", "未知")
	result.backup_time = manifest.get("exported_at", "未知")
	result.valid = True

	# 构建备份摘要
	result.backup_summary = {
	"tables": list(manifest.get("tables", {}).keys()),
	"has_knowledge_bases": manifest.get("has_knowledge_bases", False),
	"has_config": manifest.get("has_config", False),
	"directories": manifest.get("directories", []),
	}

	# 检查版本兼容性
	version_check = self._check_version_compatibility(result.backup_version)
	result.version_status = version_check["status"]
	result.can_import = version_check["can_import"]

	# 版本信息由前端根据 version_status 和 i18n 生成显示
	# 不再将版本消息添加到 warnings 列表中，避免中文硬编码
	# warnings 列表保留用于其他非版本相关的警告

	return result

	except zipfile.BadZipFile:
	result.error = "无效的 ZIP 文件"
	return result
	except Exception as e:
	result.error = f"检查备份文件失败: {e}"
	return result

	def _check_version_compatibility(self, backup_version: str) -> dict:
	"""检查版本兼容性

	规则：
	- 主版本（前两位，如 4.9）必须一致，否则拒绝
	- 小版本（第三位，如 4.9.1 vs 4.9.2）不同时，警告但允许导入

	Returns:
	dict: {status, can_import, message}
	"""
	if not backup_version:
	return {
	"status": "major_diff",
	"can_import": False,
	"message": "备份文件缺少版本信息",
	}

	# 提取主版本（前两位）进行比较
	backup_major = _get_major_version(backup_version)
	current_major = _get_major_version(VERSION)

	# 比较主版本
	if VersionComparator.compare_version(backup_major, current_major) != 0:
	return {
	"status": "major_diff",
	"can_import": False,
	"message": (
	f"主版本不兼容: 备份版本 {backup_version}, 当前版本 {VERSION}。"
	f"跨主版本导入可能导致数据损坏，请使用相同主版本的 AstrBot。"
	),
	}

	# 比较完整版本
	version_cmp = VersionComparator.compare_version(backup_version, VERSION)
	if version_cmp != 0:
	return {
	"status": "minor_diff",
	"can_import": True,
	"message": (
	f"小版本差异: 备份版本 {backup_version}, 当前版本 {VERSION}。"
	),
	}

	return {
	"status": "match",
	"can_import": True,
	"message": "版本匹配",
	}

	async def import_all(
	self,
	zip_path: str,
	mode: str = "replace", # "replace" 清空后导入
	progress_callback: Any \| None = None,
	) -> ImportResult:
	"""从 ZIP 文件导入所有数据

	Args:
	zip_path: ZIP 备份文件路径
	mode: 导入模式，目前仅支持 "replace"（清空后导入）
	progress_callback: 进度回调函数，接收参数 (stage, current, total, message)

	Returns:
	ImportResult: 导入结果
	"""
	result = ImportResult()

	if not os.path.exists(zip_path):
	result.add_error(f"备份文件不存在: {zip_path}")
	return result

	logger.info(f"开始从 {zip_path} 导入备份")

	try:
	with zipfile.ZipFile(zip_path, "r") as zf:
	# 1. 读取并验证 manifest
	if progress_callback:
	await progress_callback("validate", 0, 100, "正在验证备份文件...")

	try:
	manifest_data = zf.read("manifest.json")
	manifest = json.loads(manifest_data)
	except KeyError:
	result.add_error("备份文件缺少 manifest.json")
	return result
	except json.JSONDecodeError as e:
	result.add_error(f"manifest.json 格式错误: {e}")
	return result

	# 版本校验
	try:
	self._validate_version(manifest)
	except ValueError as e:
	result.add_error(str(e))
	return result

	if progress_callback:
	await progress_callback("validate", 100, 100, "验证完成")

	# 2. 导入主数据库
	if progress_callback:
	await progress_callback("main_db", 0, 100, "正在导入主数据库...")

	try:
	main_data_content = zf.read("databases/main_db.json")
	main_data = json.loads(main_data_content)

	if mode == "replace":
	await self._clear_main_db()

	imported = await self._import_main_database(main_data)
	result.imported_tables.update(imported)
	except DatabaseClearError as e:
	result.add_error(f"清空主数据库失败: {e}")
	return result
	except Exception as e:
	result.add_error(f"导入主数据库失败: {e}")
	return result

	if progress_callback:
	await progress_callback("main_db", 100, 100, "主数据库导入完成")

	# 3. 导入知识库
	if self.kb_manager and "databases/kb_metadata.json" in zf.namelist():
	if progress_callback:
	await progress_callback("kb", 0, 100, "正在导入知识库...")

	try:
	kb_meta_content = zf.read("databases/kb_metadata.json")
	kb_meta_data = json.loads(kb_meta_content)

	if mode == "replace":
	await self._clear_kb_data()

	await self._import_knowledge_bases(zf, kb_meta_data, result)
	except Exception as e:
	result.add_warning(f"导入知识库失败: {e}")

	if progress_callback:
	await progress_callback("kb", 100, 100, "知识库导入完成")

	# 4. 导入配置文件
	if progress_callback:
	await progress_callback("config", 0, 100, "正在导入配置文件...")

	if "config/cmd_config.json" in zf.namelist():
	try:
	config_content = zf.read("config/cmd_config.json")
	# 备份现有配置
	if os.path.exists(self.config_path):
	backup_path = f"{self.config_path}.bak"
	shutil.copy2(self.config_path, backup_path)

	with open(self.config_path, "wb") as f:
	f.write(config_content)
	result.imported_files["config"] = 1
	except Exception as e:
	result.add_warning(f"导入配置文件失败: {e}")

	if progress_callback:
	await progress_callback("config", 100, 100, "配置文件导入完成")

	# 5. 导入附件文件
	if progress_callback:
	await progress_callback("attachments", 0, 100, "正在导入附件...")

	attachment_count = await self._import_attachments(
	zf, main_data.get("attachments", [])
	)
	result.imported_files["attachments"] = attachment_count

	if progress_callback:
	await progress_callback("attachments", 100, 100, "附件导入完成")

	# 6. 导入插件和其他目录
	if progress_callback:
	await progress_callback(
	"directories", 0, 100, "正在导入插件和数据目录..."
	)

	dir_stats = await self._import_directories(zf, manifest, result)
	result.imported_directories = dir_stats

	if progress_callback:
	await progress_callback("directories", 100, 100, "目录导入完成")

	logger.info(f"备份导入完成: {result.to_dict()}")
	return result

	except zipfile.BadZipFile:
	result.add_error("无效的 ZIP 文件")
	return result
	except Exception as e:
	result.add_error(f"导入失败: {e}")
	return result

	def _validate_version(self, manifest: dict) -> None:
	"""验证版本兼容性 - 仅允许相同主版本导入

	注意：此方法仅在 import_all 中调用，用于双重校验。
	前端应先调用 pre_check 获取详细的版本信息并让用户确认。
	"""
	backup_version = manifest.get("astrbot_version")
	if not backup_version:
	raise ValueError("备份文件缺少版本信息")

	# 使用新的版本兼容性检查
	version_check = self._check_version_compatibility(backup_version)

	if version_check["status"] == "major_diff":
	raise ValueError(version_check["message"])

	# minor_diff 和 match 都允许导入
	if version_check["status"] == "minor_diff":
	logger.warning(f"版本差异警告: {version_check['message']}")

	async def _clear_main_db(self) -> None:
	"""清空主数据库所有表"""
	async with self.main_db.get_db() as session:
	async with session.begin():
	for table_name, model_class in MAIN_DB_MODELS.items():
	try:
	await session.execute(delete(model_class))
	logger.debug(f"已清空表 {table_name}")
	except Exception as e:
	raise DatabaseClearError(
	f"清空表 {table_name} 失败: {e}"
	) from e

	async def _clear_kb_data(self) -> None:
	"""清空知识库数据"""
	if not self.kb_manager:
	return

	# 清空知识库元数据表
	async with self.kb_manager.kb_db.get_db() as session:
	async with session.begin():
	for table_name, model_class in KB_METADATA_MODELS.items():
	try:
	await session.execute(delete(model_class))
	logger.debug(f"已清空知识库表 {table_name}")
	except Exception as e:
	logger.warning(f"清空知识库表 {table_name} 失败: {e}")

	# 删除知识库文件目录
	for kb_id in list(self.kb_manager.kb_insts.keys()):
	try:
	kb_helper = self.kb_manager.kb_insts[kb_id]
	await kb_helper.terminate()
	if kb_helper.kb_dir.exists():
	shutil.rmtree(kb_helper.kb_dir)
	except Exception as e:
	logger.warning(f"清理知识库 {kb_id} 失败: {e}")

	self.kb_manager.kb_insts.clear()

	async def _import_main_database(
	self, data: dict[str, list[dict]]
	) -> dict[str, int]:
	"""导入主数据库数据"""
	imported: dict[str, int] = {}

	async with self.main_db.get_db() as session:
	async with session.begin():
	for table_name, rows in data.items():
	model_class = MAIN_DB_MODELS.get(table_name)
	if not model_class:
	logger.warning(f"未知的表: {table_name}")
	continue
	normalized_rows = self._preprocess_main_table_rows(table_name, rows)

	count = 0
	for row in normalized_rows:
	try:
	# 转换 datetime 字符串为 datetime 对象
	row = self._convert_datetime_fields(row, model_class)
	obj = model_class(**row)
	session.add(obj)
	count += 1
	except Exception as e:
	logger.warning(f"导入记录到 {table_name} 失败: {e}")

	imported[table_name] = count
	logger.debug(f"导入表 {table_name}: {count} 条记录")

	return imported

	def _preprocess_main_table_rows(
	self, table_name: str, rows: list[dict[str, Any]]
	) -> list[dict[str, Any]]:
	if table_name == "platform_stats":
	normalized_rows = self._merge_platform_stats_rows(rows)
	duplicate_count = len(rows) - len(normalized_rows)
	if duplicate_count > 0:
	logger.warning(
	"检测到 %s 重复键 %d 条，已在导入前聚合",
	table_name,
	duplicate_count,
	)
	return normalized_rows
	return rows

	def _merge_platform_stats_rows(
	self, rows: list[dict[str, Any]]
	) -> list[dict[str, Any]]:
	"""Merge duplicate platform_stats rows by normalized timestamp/platform key.

	Note:
	- Invalid/empty timestamps are kept as distinct rows to avoid accidental merging.
	- Non-string platform_id/platform_type are kept as distinct rows.
	- Invalid count warnings are rate-limited per function invocation.
	"""
	merged: dict[tuple[str, str, str], dict[str, Any]] = {}
	result: list[dict[str, Any]] = []
	warn_limiter = _InvalidCountWarnLimiter(PLATFORM_STATS_INVALID_COUNT_WARN_LIMIT)

	for row in rows:
	normalized_row, normalized_timestamp, count = (
	self._normalize_platform_stats_entry(row, warn_limiter)
	)
	platform_id = normalized_row.get("platform_id")
	platform_type = normalized_row.get("platform_type")

	if (
	normalized_timestamp is None
	or not isinstance(platform_id, str)
	or not isinstance(platform_type, str)
	):
	result.append(normalized_row)
	continue

	merge_key = (normalized_timestamp, platform_id, platform_type)
	existing = merged.get(merge_key)
	if existing is None:
	merged[merge_key] = normalized_row
	result.append(normalized_row)
	else:
	existing["count"] += count

	return result

	def _normalize_platform_stats_entry(
	self,
	row: dict[str, Any],
	warn_limiter: _InvalidCountWarnLimiter,
	) -> tuple[dict[str, Any], str \| None, int]:
	normalized_row = dict(row)
	raw_timestamp = normalized_row.get("timestamp")
	normalized_timestamp = self._normalize_platform_stats_timestamp(raw_timestamp)

	if normalized_timestamp is not None:
	normalized_row["timestamp"] = normalized_timestamp
	elif isinstance(raw_timestamp, str):
	normalized_row["timestamp"] = raw_timestamp.strip()
	elif raw_timestamp is None:
	normalized_row["timestamp"] = ""
	else:
	normalized_row["timestamp"] = str(raw_timestamp)

	raw_count = normalized_row.get("count", 0)
	try:
	count = int(raw_count)
	except (TypeError, ValueError):
	key_for_log = (
	normalized_row.get("timestamp"),
	repr(normalized_row.get("platform_id")),
	repr(normalized_row.get("platform_type")),
	)
	warn_limiter.warn_invalid_count(raw_count, key_for_log)
	count = 0

	normalized_row["count"] = count
	return normalized_row, normalized_timestamp, count

	def _normalize_platform_stats_timestamp(self, value: Any) -> str \| None:
	if isinstance(value, datetime):
	dt = value
	if dt.tzinfo is None:
	dt = dt.replace(tzinfo=timezone.utc)
	else:
	dt = dt.astimezone(timezone.utc)
	return dt.isoformat()
	if isinstance(value, str):
	timestamp = value.strip()
	if not timestamp:
	return None
	if timestamp.endswith("Z"):
	timestamp = f"{timestamp[:-1]}+00:00"
	try:
	dt = datetime.fromisoformat(timestamp)
	if dt.tzinfo is None:
	dt = dt.replace(tzinfo=timezone.utc)
	else:
	dt = dt.astimezone(timezone.utc)
	return dt.isoformat()
	except ValueError:
	return None
	return None

	async def _import_knowledge_bases(
	self,
	zf: zipfile.ZipFile,
	kb_meta_data: dict[str, list[dict]],
	result: ImportResult,
	) -> None:
	"""导入知识库数据"""
	if not self.kb_manager:
	return

	# 1. 导入知识库元数据
	async with self.kb_manager.kb_db.get_db() as session:
	async with session.begin():
	for table_name, rows in kb_meta_data.items():
	model_class = KB_METADATA_MODELS.get(table_name)
	if not model_class:
	continue

	count = 0
	for row in rows:
	try:
	row = self._convert_datetime_fields(row, model_class)
	obj = model_class(**row)
	session.add(obj)
	count += 1
	except Exception as e:
	logger.warning(f"导入知识库记录到 {table_name} 失败: {e}")

	result.imported_tables[f"kb_{table_name}"] = count

	# 2. 导入每个知识库的文档和文件
	for kb_data in kb_meta_data.get("knowledge_bases", []):
	kb_id = kb_data.get("kb_id")
	if not kb_id:
	continue

	# 创建知识库目录
	kb_dir = Path(self.kb_root_dir) / kb_id
	kb_dir.mkdir(parents=True, exist_ok=True)

	# 导入文档数据
	doc_path = f"databases/kb_{kb_id}/documents.json"
	if doc_path in zf.namelist():
	try:
	doc_content = zf.read(doc_path)
	doc_data = json.loads(doc_content)

	# 导入到文档存储数据库
	await self._import_kb_documents(kb_id, doc_data)
	except Exception as e:
	result.add_warning(f"导入知识库 {kb_id} 的文档失败: {e}")

	# 导入 FAISS 索引
	faiss_path = f"databases/kb_{kb_id}/index.faiss"
	if faiss_path in zf.namelist():
	try:
	target_path = kb_dir / "index.faiss"
	with zf.open(faiss_path) as src, open(target_path, "wb") as dst:
	dst.write(src.read())
	except Exception as e:
	result.add_warning(f"导入知识库 {kb_id} 的 FAISS 索引失败: {e}")

	# 导入媒体文件
	media_prefix = f"files/kb_media/{kb_id}/"
	for name in zf.namelist():
	if name.startswith(media_prefix):
	try:
	rel_path = name[len(media_prefix) :]
	target_path = kb_dir / rel_path
	target_path.parent.mkdir(parents=True, exist_ok=True)
	with zf.open(name) as src, open(target_path, "wb") as dst:
	dst.write(src.read())
	except Exception as e:
	result.add_warning(f"导入媒体文件 {name} 失败: {e}")

	# 3. 重新加载知识库实例
	await self.kb_manager.load_kbs()

	async def _import_kb_documents(self, kb_id: str, doc_data: dict) -> None:
	"""导入知识库文档到向量数据库"""
	from astrbot.core.db.vec_db.faiss_impl.document_storage import DocumentStorage

	kb_dir = Path(self.kb_root_dir) / kb_id
	doc_db_path = kb_dir / "doc.db"

	# 初始化文档存储
	doc_storage = DocumentStorage(str(doc_db_path))
	await doc_storage.initialize()

	try:
	documents = doc_data.get("documents", [])
	for doc in documents:
	try:
	await doc_storage.insert_document(
	doc_id=doc.get("doc_id", ""),
	text=doc.get("text", ""),
	metadata=json.loads(doc.get("metadata", "{}")),
	)
	except Exception as e:
	logger.warning(f"导入文档块失败: {e}")
	finally:
	await doc_storage.close()

	async def _import_attachments(
	self,
	zf: zipfile.ZipFile,
	attachments: list[dict],
	) -> int:
	"""导入附件文件"""
	count = 0

	attachments_dir = Path(self.config_path).parent / "attachments"
	attachments_dir.mkdir(parents=True, exist_ok=True)

	attachment_prefix = "files/attachments/"
	for name in zf.namelist():
	if name.startswith(attachment_prefix) and name != attachment_prefix:
	try:
	# 从附件记录中找到原始路径
	attachment_id = os.path.splitext(os.path.basename(name))[0]
	original_path = None
	for att in attachments:
	if att.get("attachment_id") == attachment_id:
	original_path = att.get("path")
	break

	if original_path:
	target_path = Path(original_path)
	else:
	target_path = attachments_dir / os.path.basename(name)

	target_path.parent.mkdir(parents=True, exist_ok=True)
	with zf.open(name) as src, open(target_path, "wb") as dst:
	dst.write(src.read())
	count += 1
	except Exception as e:
	logger.warning(f"导入附件 {name} 失败: {e}")

	return count

	async def _import_directories(
	self,
	zf: zipfile.ZipFile,
	manifest: dict,
	result: ImportResult,
	) -> dict[str, int]:
	"""导入插件和其他数据目录

	Args:
	zf: ZIP 文件对象
	manifest: 备份清单
	result: 导入结果对象

	Returns:
	dict: 每个目录导入的文件数量
	"""
	dir_stats: dict[str, int] = {}

	# 检查备份版本是否支持目录备份（需要版本 >= 1.1）
	backup_version = manifest.get("version", "1.0")
	if VersionComparator.compare_version(backup_version, "1.1") < 0:
	logger.info("备份版本不支持目录备份，跳过目录导入")
	return dir_stats

	backed_up_dirs = manifest.get("directories", [])
	backup_directories = get_backup_directories()

	for dir_name in backed_up_dirs:
	if dir_name not in backup_directories:
	result.add_warning(f"未知的目录类型: {dir_name}")
	continue

	target_dir = Path(backup_directories[dir_name])
	archive_prefix = f"directories/{dir_name}/"

	file_count = 0

	try:
	# 获取该目录下的所有文件
	dir_files = [
	name
	for name in zf.namelist()
	if name.startswith(archive_prefix) and name != archive_prefix
	]

	if not dir_files:
	continue

	# 备份现有目录（如果存在）
	if target_dir.exists():
	backup_path = Path(f"{target_dir}.bak")
	if backup_path.exists():
	shutil.rmtree(backup_path)
	shutil.move(str(target_dir), str(backup_path))
	logger.debug(f"已备份现有目录 {target_dir} 到 {backup_path}")

	# 创建目标目录
	target_dir.mkdir(parents=True, exist_ok=True)

	# 解压文件
	for name in dir_files:
	try:
	# 计算相对路径
	rel_path = name[len(archive_prefix) :]
	if not rel_path: # 跳过目录条目
	continue

	target_path = target_dir / rel_path
	target_path.parent.mkdir(parents=True, exist_ok=True)

	with zf.open(name) as src, open(target_path, "wb") as dst:
	dst.write(src.read())
	file_count += 1
	except Exception as e:
	result.add_warning(f"导入文件 {name} 失败: {e}")

	dir_stats[dir_name] = file_count
	logger.debug(f"导入目录 {dir_name}: {file_count} 个文件")

	except Exception as e:
	result.add_warning(f"导入目录 {dir_name} 失败: {e}")
	dir_stats[dir_name] = 0

	return dir_stats

	def _convert_datetime_fields(self, row: dict, model_class: type) -> dict:
	"""转换 datetime 字符串字段为 datetime 对象"""
	result = row.copy()

	# 获取模型的 datetime 字段
	from sqlalchemy import inspect as sa_inspect

	try:
	mapper = sa_inspect(model_class)
	for column in mapper.columns:
	if column.name in result and result[column.name] is not None:
	# 检查是否是 datetime 类型的列
	from sqlalchemy import DateTime

	if isinstance(column.type, DateTime):
	value = result[column.name]
	if isinstance(value, str):
	# 解析 ISO 格式的日期时间字符串
	result[column.name] = datetime.fromisoformat(value)
	except Exception:
	pass

	return result