Upload 6 files
Browse files- README.md +33 -39
- backup_to_dataset.py +6 -20
- backup_worker.sh +1 -1
- requirements.txt +1 -1
- restore_from_dataset.py +114 -0
- start_patch_and_instructions.txt +52 -0
README.md
CHANGED
|
@@ -1,39 +1,33 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
##
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
-
|
| 27 |
-
- `
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
TOTP_ENCRYPTION_KEY=超长随机字符串
|
| 35 |
-
ADMIN_EMAIL=admin@example.com
|
| 36 |
-
ADMIN_PASSWORD=强密码
|
| 37 |
-
HF_TOKEN=你的 Hugging Face Write Token
|
| 38 |
-
DATASET_REPO_ID=你的用户名/sub2api-backups
|
| 39 |
-
```
|
|
|
|
| 1 |
+
# Sub2API: 备份到 Dataset & 从 Dataset 自动恢复
|
| 2 |
+
|
| 3 |
+
这个包包含 4 个文件:
|
| 4 |
+
|
| 5 |
+
- `backup_to_dataset.py`:把 PostgreSQL 导出为 SQL.gz,上传到 Hugging Face Dataset Repo,并只保留最近 N 个备份。
|
| 6 |
+
- `restore_from_dataset.py`:当本地数据库是空库时,从 Dataset Repo 下载最近一份 SQL.gz 并恢复。
|
| 7 |
+
- `backup_worker.sh`:定时执行备份。
|
| 8 |
+
- `requirements.txt`:安装 `huggingface_hub`。
|
| 9 |
+
|
| 10 |
+
## 推荐接入方式
|
| 11 |
+
|
| 12 |
+
1. 把 `backup_to_dataset.py`、`restore_from_dataset.py`、`backup_worker.sh`、`requirements.txt` 复制到 Space 仓库。
|
| 13 |
+
2. 在 Dockerfile 中把它们复制到镜像里,并在 venv 中安装 `huggingface_hub`。
|
| 14 |
+
3. 在 `start.sh` 里:
|
| 15 |
+
- 记录 `fresh_db=true/false`
|
| 16 |
+
- 仅在 fresh DB 时执行 `restore_from_dataset.py`
|
| 17 |
+
4. 在 `supervisord.conf` 里增加 `backup-worker` 进程。
|
| 18 |
+
|
| 19 |
+
## 需要的 HF Space 环境变量
|
| 20 |
+
|
| 21 |
+
### Secrets
|
| 22 |
+
- `HF_TOKEN`:有 dataset repo 写权限的 Hugging Face token
|
| 23 |
+
- `DATASET_REPO_ID`:例如 `gallyg/sub`
|
| 24 |
+
|
| 25 |
+
### Variables
|
| 26 |
+
- `BACKUP_INTERVAL_MINUTES=60`
|
| 27 |
+
- `BACKUP_KEEP_LAST=10`
|
| 28 |
+
- `AUTO_RESTORE_FROM_DATASET=true`
|
| 29 |
+
|
| 30 |
+
## 说明
|
| 31 |
+
|
| 32 |
+
- 适用于 Hugging Face 免费 Space 的“本地 PostgreSQL + 定时备份到 Dataset + 空库时自动恢复”模式。
|
| 33 |
+
- 这不是块级持久化;如果 Space 在两次备份之间崩掉,仍可能丢失最近一小段新增数据。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backup_to_dataset.py
CHANGED
|
@@ -21,7 +21,6 @@ def env(name: str, default: str | None = None, required: bool = False) -> str:
|
|
| 21 |
|
| 22 |
|
| 23 |
def run_pg_dump(tmp_sql: Path) -> None:
|
| 24 |
-
# 强制使用本地 PostgreSQL
|
| 25 |
host = env("DATABASE_HOST", "127.0.0.1")
|
| 26 |
port = env("DATABASE_PORT", "5432")
|
| 27 |
user = env("DATABASE_USER", env("POSTGRES_USER", "sub2api"))
|
|
@@ -56,8 +55,7 @@ def upload_backup(gz_path: Path, metadata: dict) -> None:
|
|
| 56 |
dataset_repo_id = env("DATASET_REPO_ID", required=True)
|
| 57 |
|
| 58 |
api = HfApi(token=hf_token)
|
| 59 |
-
|
| 60 |
-
remote_sql_path = f"postgres/{timestamp}.sql.gz"
|
| 61 |
remote_latest_path = "postgres/latest.json"
|
| 62 |
|
| 63 |
print(f"[backup] uploading {gz_path.name} -> {dataset_repo_id}:{remote_sql_path}")
|
|
@@ -66,7 +64,7 @@ def upload_backup(gz_path: Path, metadata: dict) -> None:
|
|
| 66 |
path_in_repo=remote_sql_path,
|
| 67 |
repo_id=dataset_repo_id,
|
| 68 |
repo_type="dataset",
|
| 69 |
-
commit_message=f"backup: {
|
| 70 |
)
|
| 71 |
|
| 72 |
latest_tmp = gz_path.parent / "latest.json"
|
|
@@ -76,7 +74,7 @@ def upload_backup(gz_path: Path, metadata: dict) -> None:
|
|
| 76 |
path_in_repo=remote_latest_path,
|
| 77 |
repo_id=dataset_repo_id,
|
| 78 |
repo_type="dataset",
|
| 79 |
-
commit_message=f"update latest backup metadata: {
|
| 80 |
)
|
| 81 |
|
| 82 |
|
|
@@ -88,19 +86,10 @@ def prune_old_backups() -> None:
|
|
| 88 |
fs = HfFileSystem(token=hf_token)
|
| 89 |
api = HfApi(token=hf_token)
|
| 90 |
|
| 91 |
-
# 列出 dataset repo 下 postgres/ 的所有 sql.gz 备份
|
| 92 |
pattern = f"datasets/{dataset_repo_id}/postgres/*.sql.gz"
|
| 93 |
all_files = fs.glob(pattern)
|
| 94 |
-
|
| 95 |
-
# 转成 repo 内路径,如 postgres/20260320-120000.sql.gz
|
| 96 |
-
remote_paths = []
|
| 97 |
prefix = f"datasets/{dataset_repo_id}/"
|
| 98 |
-
for p in all_files
|
| 99 |
-
if p.startswith(prefix):
|
| 100 |
-
remote_paths.append(p[len(prefix):])
|
| 101 |
-
|
| 102 |
-
# 文件名天然按时间戳可排序
|
| 103 |
-
remote_paths = sorted(remote_paths)
|
| 104 |
|
| 105 |
if len(remote_paths) <= keep_last:
|
| 106 |
print(f"[backup] retention ok: {len(remote_paths)} <= {keep_last}")
|
|
@@ -108,7 +97,6 @@ def prune_old_backups() -> None:
|
|
| 108 |
|
| 109 |
to_delete = remote_paths[:-keep_last]
|
| 110 |
print(f"[backup] pruning {len(to_delete)} old backup(s)")
|
| 111 |
-
|
| 112 |
operations = [CommitOperationDelete(path_in_repo=p) for p in to_delete]
|
| 113 |
api.create_commit(
|
| 114 |
repo_id=dataset_repo_id,
|
|
@@ -134,7 +122,6 @@ def main() -> int:
|
|
| 134 |
try:
|
| 135 |
run_pg_dump(sql_path)
|
| 136 |
gzip_file(sql_path, gz_path)
|
| 137 |
-
|
| 138 |
metadata = {
|
| 139 |
"timestamp_utc": ts,
|
| 140 |
"generated_at_iso": now.isoformat(),
|
|
@@ -143,12 +130,11 @@ def main() -> int:
|
|
| 143 |
"database_name": env("DATABASE_DBNAME", env("POSTGRES_DB", "sub2api")),
|
| 144 |
"dataset_repo_id": env("DATASET_REPO_ID", ""),
|
| 145 |
"file_name": gz_path.name,
|
|
|
|
| 146 |
"file_size_bytes": gz_path.stat().st_size,
|
| 147 |
}
|
| 148 |
-
|
| 149 |
upload_backup(gz_path, metadata)
|
| 150 |
prune_old_backups()
|
| 151 |
-
|
| 152 |
print("[backup] done")
|
| 153 |
return 0
|
| 154 |
except Exception as exc:
|
|
@@ -163,4 +149,4 @@ def main() -> int:
|
|
| 163 |
|
| 164 |
|
| 165 |
if __name__ == "__main__":
|
| 166 |
-
raise SystemExit(main())
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
def run_pg_dump(tmp_sql: Path) -> None:
|
|
|
|
| 24 |
host = env("DATABASE_HOST", "127.0.0.1")
|
| 25 |
port = env("DATABASE_PORT", "5432")
|
| 26 |
user = env("DATABASE_USER", env("POSTGRES_USER", "sub2api"))
|
|
|
|
| 55 |
dataset_repo_id = env("DATASET_REPO_ID", required=True)
|
| 56 |
|
| 57 |
api = HfApi(token=hf_token)
|
| 58 |
+
remote_sql_path = metadata["remote_sql_path"]
|
|
|
|
| 59 |
remote_latest_path = "postgres/latest.json"
|
| 60 |
|
| 61 |
print(f"[backup] uploading {gz_path.name} -> {dataset_repo_id}:{remote_sql_path}")
|
|
|
|
| 64 |
path_in_repo=remote_sql_path,
|
| 65 |
repo_id=dataset_repo_id,
|
| 66 |
repo_type="dataset",
|
| 67 |
+
commit_message=f"backup: {metadata['timestamp_utc']}",
|
| 68 |
)
|
| 69 |
|
| 70 |
latest_tmp = gz_path.parent / "latest.json"
|
|
|
|
| 74 |
path_in_repo=remote_latest_path,
|
| 75 |
repo_id=dataset_repo_id,
|
| 76 |
repo_type="dataset",
|
| 77 |
+
commit_message=f"update latest backup metadata: {metadata['timestamp_utc']}",
|
| 78 |
)
|
| 79 |
|
| 80 |
|
|
|
|
| 86 |
fs = HfFileSystem(token=hf_token)
|
| 87 |
api = HfApi(token=hf_token)
|
| 88 |
|
|
|
|
| 89 |
pattern = f"datasets/{dataset_repo_id}/postgres/*.sql.gz"
|
| 90 |
all_files = fs.glob(pattern)
|
|
|
|
|
|
|
|
|
|
| 91 |
prefix = f"datasets/{dataset_repo_id}/"
|
| 92 |
+
remote_paths = sorted([p[len(prefix):] for p in all_files if p.startswith(prefix)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
if len(remote_paths) <= keep_last:
|
| 95 |
print(f"[backup] retention ok: {len(remote_paths)} <= {keep_last}")
|
|
|
|
| 97 |
|
| 98 |
to_delete = remote_paths[:-keep_last]
|
| 99 |
print(f"[backup] pruning {len(to_delete)} old backup(s)")
|
|
|
|
| 100 |
operations = [CommitOperationDelete(path_in_repo=p) for p in to_delete]
|
| 101 |
api.create_commit(
|
| 102 |
repo_id=dataset_repo_id,
|
|
|
|
| 122 |
try:
|
| 123 |
run_pg_dump(sql_path)
|
| 124 |
gzip_file(sql_path, gz_path)
|
|
|
|
| 125 |
metadata = {
|
| 126 |
"timestamp_utc": ts,
|
| 127 |
"generated_at_iso": now.isoformat(),
|
|
|
|
| 130 |
"database_name": env("DATABASE_DBNAME", env("POSTGRES_DB", "sub2api")),
|
| 131 |
"dataset_repo_id": env("DATASET_REPO_ID", ""),
|
| 132 |
"file_name": gz_path.name,
|
| 133 |
+
"remote_sql_path": f"postgres/{ts}.sql.gz",
|
| 134 |
"file_size_bytes": gz_path.stat().st_size,
|
| 135 |
}
|
|
|
|
| 136 |
upload_backup(gz_path, metadata)
|
| 137 |
prune_old_backups()
|
|
|
|
| 138 |
print("[backup] done")
|
| 139 |
return 0
|
| 140 |
except Exception as exc:
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
if __name__ == "__main__":
|
| 152 |
+
raise SystemExit(main())
|
backup_worker.sh
CHANGED
|
@@ -10,4 +10,4 @@ while true; do
|
|
| 10 |
"${VENV_PATH}/bin/python" "${APP_HOME}/backup_to_dataset.py" --once || true
|
| 11 |
echo "[backup-worker] Sleeping ${INTERVAL_MINUTES} minute(s)..."
|
| 12 |
sleep "$((INTERVAL_MINUTES * 60))"
|
| 13 |
-
done
|
|
|
|
| 10 |
"${VENV_PATH}/bin/python" "${APP_HOME}/backup_to_dataset.py" --once || true
|
| 11 |
echo "[backup-worker] Sleeping ${INTERVAL_MINUTES} minute(s)..."
|
| 12 |
sleep "$((INTERVAL_MINUTES * 60))"
|
| 13 |
+
done
|
requirements.txt
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
huggingface_hub>=1.7.0
|
|
|
|
| 1 |
+
huggingface_hub>=1.7.0
|
restore_from_dataset.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import argparse
|
| 3 |
+
import gzip
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
import subprocess
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from huggingface_hub import HfFileSystem
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def env(name: str, default: str | None = None, required: bool = False) -> str:
|
| 15 |
+
value = os.getenv(name, default)
|
| 16 |
+
if required and not value:
|
| 17 |
+
raise RuntimeError(f"Missing required environment variable: {name}")
|
| 18 |
+
return value or ""
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def download_latest_metadata(fs: HfFileSystem, dataset_repo_id: str, workdir: Path) -> dict | None:
|
| 22 |
+
remote_latest = f"datasets/{dataset_repo_id}/postgres/latest.json"
|
| 23 |
+
local_latest = workdir / "latest.json"
|
| 24 |
+
try:
|
| 25 |
+
with fs.open(remote_latest, "rb") as src, local_latest.open("wb") as dst:
|
| 26 |
+
shutil.copyfileobj(src, dst)
|
| 27 |
+
except Exception as exc:
|
| 28 |
+
print(f"[restore] latest.json not found or unreadable: {exc}")
|
| 29 |
+
return None
|
| 30 |
+
return json.loads(local_latest.read_text(encoding="utf-8"))
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def download_backup(fs: HfFileSystem, dataset_repo_id: str, remote_sql_path: str, local_gz: Path) -> None:
|
| 34 |
+
remote_path = f"datasets/{dataset_repo_id}/{remote_sql_path}"
|
| 35 |
+
print(f"[restore] downloading {remote_path}")
|
| 36 |
+
with fs.open(remote_path, "rb") as src, local_gz.open("wb") as dst:
|
| 37 |
+
shutil.copyfileobj(src, dst)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def gunzip_file(src: Path, dst: Path) -> None:
|
| 41 |
+
with gzip.open(src, "rb") as fin, dst.open("wb") as fout:
|
| 42 |
+
shutil.copyfileobj(fin, fout)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def restore_sql(sql_path: Path) -> None:
|
| 46 |
+
host = env("DATABASE_HOST", "127.0.0.1")
|
| 47 |
+
port = env("DATABASE_PORT", "5432")
|
| 48 |
+
user = env("DATABASE_USER", env("POSTGRES_USER", "sub2api"))
|
| 49 |
+
password = env("DATABASE_PASSWORD", env("POSTGRES_PASSWORD", ""))
|
| 50 |
+
dbname = env("DATABASE_DBNAME", env("POSTGRES_DB", "sub2api"))
|
| 51 |
+
|
| 52 |
+
cmd = [
|
| 53 |
+
"psql",
|
| 54 |
+
"-h", host,
|
| 55 |
+
"-p", port,
|
| 56 |
+
"-U", user,
|
| 57 |
+
"-d", dbname,
|
| 58 |
+
"-v", "ON_ERROR_STOP=1",
|
| 59 |
+
"-f", str(sql_path),
|
| 60 |
+
]
|
| 61 |
+
env_map = os.environ.copy()
|
| 62 |
+
env_map["PGPASSWORD"] = password
|
| 63 |
+
print(f"[restore] running: {' '.join(cmd[:-1])} <sql>")
|
| 64 |
+
subprocess.run(cmd, check=True, env=env_map)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def main() -> int:
|
| 68 |
+
parser = argparse.ArgumentParser()
|
| 69 |
+
parser.add_argument("--restore-latest", action="store_true")
|
| 70 |
+
parser.parse_args()
|
| 71 |
+
|
| 72 |
+
dataset_repo_id = env("DATASET_REPO_ID", "")
|
| 73 |
+
if not dataset_repo_id:
|
| 74 |
+
print("[restore] DATASET_REPO_ID not set, skipping restore")
|
| 75 |
+
return 0
|
| 76 |
+
|
| 77 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 78 |
+
fs = HfFileSystem(token=hf_token) if hf_token else HfFileSystem()
|
| 79 |
+
|
| 80 |
+
workdir = Path("/tmp/sub2api_restore")
|
| 81 |
+
workdir.mkdir(parents=True, exist_ok=True)
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
metadata = download_latest_metadata(fs, dataset_repo_id, workdir)
|
| 85 |
+
if not metadata:
|
| 86 |
+
print("[restore] no backup metadata available, skipping restore")
|
| 87 |
+
return 0
|
| 88 |
+
|
| 89 |
+
remote_sql_path = metadata.get("remote_sql_path") or f"postgres/{metadata['timestamp_utc']}.sql.gz"
|
| 90 |
+
gz_path = workdir / Path(remote_sql_path).name
|
| 91 |
+
sql_path = workdir / (gz_path.stem)
|
| 92 |
+
|
| 93 |
+
download_backup(fs, dataset_repo_id, remote_sql_path, gz_path)
|
| 94 |
+
gunzip_file(gz_path, sql_path)
|
| 95 |
+
restore_sql(sql_path)
|
| 96 |
+
|
| 97 |
+
marker = Path(env("SUB2API_DATA_DIR", "/app/data")) / "restore_last.json"
|
| 98 |
+
marker.parent.mkdir(parents=True, exist_ok=True)
|
| 99 |
+
marker.write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 100 |
+
print(f"[restore] restored backup {remote_sql_path}")
|
| 101 |
+
return 0
|
| 102 |
+
except Exception as exc:
|
| 103 |
+
print(f"[restore] failed: {exc}", file=sys.stderr)
|
| 104 |
+
return 1
|
| 105 |
+
finally:
|
| 106 |
+
for p in workdir.glob("*"):
|
| 107 |
+
try:
|
| 108 |
+
p.unlink()
|
| 109 |
+
except Exception:
|
| 110 |
+
pass
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
raise SystemExit(main())
|
start_patch_and_instructions.txt
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===== 在 start.sh 中集成 restore / backup 的关键片段 =====
|
| 2 |
+
|
| 3 |
+
# 1) 数据库环境变量里,继续保留本地 PostgreSQL
|
| 4 |
+
export DATABASE_HOST="127.0.0.1"
|
| 5 |
+
export DATABASE_PORT="5432"
|
| 6 |
+
export DATABASE_USER="${POSTGRES_USER}"
|
| 7 |
+
export DATABASE_PASSWORD="${POSTGRES_PASSWORD}"
|
| 8 |
+
export DATABASE_DBNAME="${POSTGRES_DB}"
|
| 9 |
+
export DATABASE_SSLMODE="disable"
|
| 10 |
+
|
| 11 |
+
# 2) 初始化 PostgreSQL 前后,增加 fresh_db 标记
|
| 12 |
+
fresh_db=false
|
| 13 |
+
if [[ ! -s "$PGDATA/PG_VERSION" ]]; then
|
| 14 |
+
fresh_db=true
|
| 15 |
+
fi
|
| 16 |
+
|
| 17 |
+
# 3) 在创建数据库/角色成功后、停止 bootstrap PostgreSQL 之前,增加自动恢复
|
| 18 |
+
if [[ "$fresh_db" == "true" && "${AUTO_RESTORE_FROM_DATASET:-true}" == "true" ]]; then
|
| 19 |
+
echo "[restore] fresh database detected, attempting restore from dataset..."
|
| 20 |
+
if [[ -x "$VENV_PATH/bin/python" ]]; then
|
| 21 |
+
"$VENV_PATH/bin/python" /app/restore_from_dataset.py --restore-latest || true
|
| 22 |
+
else
|
| 23 |
+
echo "[restore] python venv not found, skip restore"
|
| 24 |
+
fi
|
| 25 |
+
fi
|
| 26 |
+
|
| 27 |
+
# 4) supervisor 里增加 backup-worker
|
| 28 |
+
[program:backup-worker]
|
| 29 |
+
command=/bin/bash -lc "exec /usr/local/bin/backup_worker.sh"
|
| 30 |
+
autostart=true
|
| 31 |
+
autorestart=true
|
| 32 |
+
startsecs=5
|
| 33 |
+
stdout_logfile=/dev/stdout
|
| 34 |
+
stdout_logfile_maxbytes=0
|
| 35 |
+
stderr_logfile=/dev/stderr
|
| 36 |
+
stderr_logfile_maxbytes=0
|
| 37 |
+
priority=40
|
| 38 |
+
|
| 39 |
+
# 5) Dockerfile 里要复制这些文件并安装依赖
|
| 40 |
+
COPY --chown=user:user backup_to_dataset.py /app/backup_to_dataset.py
|
| 41 |
+
COPY --chown=user:user restore_from_dataset.py /app/restore_from_dataset.py
|
| 42 |
+
COPY --chown=user:user backup_worker.sh /usr/local/bin/backup_worker.sh
|
| 43 |
+
COPY --chown=user:user requirements.txt /app/requirements.txt
|
| 44 |
+
|
| 45 |
+
# 6) HF Space Variables / Secrets 建议新增
|
| 46 |
+
# Secrets:
|
| 47 |
+
# HF_TOKEN=你的 HF 写入 Token
|
| 48 |
+
# DATASET_REPO_ID=你的用户名/你的dataset仓库
|
| 49 |
+
# Variables:
|
| 50 |
+
# BACKUP_INTERVAL_MINUTES=60
|
| 51 |
+
# BACKUP_KEEP_LAST=10
|
| 52 |
+
# AUTO_RESTORE_FROM_DATASET=true
|