Spaces:
Running
Running
Siddharaj Shirke commited on
Commit ·
ee551d0
1
Parent(s): c7e793a
fix: fallback model upload storage when /data is unavailable
Browse files- app/main.py +78 -15
app/main.py
CHANGED
|
@@ -447,23 +447,40 @@ def _phase_model_dirs() -> list[Path]:
|
|
| 447 |
p = (REPO_ROOT / p).resolve()
|
| 448 |
configured.append(p)
|
| 449 |
|
| 450 |
-
data_root_raw = (os.getenv("OPENENV_DATA_DIR") or "
|
| 451 |
-
data_root = Path(data_root_raw)
|
| 452 |
-
if not data_root.is_absolute():
|
| 453 |
data_root = (REPO_ROOT / data_root).resolve()
|
| 454 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
repo_base = REPO_ROOT / "results" / "best_model"
|
| 456 |
-
data_base = data_root / "results" / "best_model"
|
| 457 |
|
| 458 |
candidates = [
|
| 459 |
*configured,
|
| 460 |
-
data_base / "phase1",
|
| 461 |
-
data_base / "phase2",
|
| 462 |
-
data_root / "best_model" / "phase1",
|
| 463 |
-
data_root / "best_model" / "phase2",
|
| 464 |
repo_base / "phase1",
|
| 465 |
repo_base / "phase2",
|
| 466 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
# Preserve order, remove duplicates.
|
| 469 |
deduped: list[Path] = []
|
|
@@ -490,11 +507,45 @@ def _discover_phase12_zip_models() -> list[Path]:
|
|
| 490 |
|
| 491 |
|
| 492 |
def _model_storage_base_dir() -> Path:
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
|
| 499 |
|
| 500 |
def _phase_from_model_path(path: Path) -> int:
|
|
@@ -2009,9 +2060,21 @@ async def api_rl_model_upload(
|
|
| 2009 |
)
|
| 2010 |
|
| 2011 |
safe_name = Path(name).name
|
| 2012 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2013 |
target_dir = base_dir / f"phase{phase}"
|
| 2014 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2015 |
target_path = target_dir / safe_name
|
| 2016 |
|
| 2017 |
total = 0
|
|
|
|
| 447 |
p = (REPO_ROOT / p).resolve()
|
| 448 |
configured.append(p)
|
| 449 |
|
| 450 |
+
data_root_raw = (os.getenv("OPENENV_DATA_DIR") or "").strip()
|
| 451 |
+
data_root = Path(data_root_raw) if data_root_raw else None
|
| 452 |
+
if data_root is not None and not data_root.is_absolute():
|
| 453 |
data_root = (REPO_ROOT / data_root).resolve()
|
| 454 |
|
| 455 |
+
persistence_root = getattr(persistence, "data_dir", None)
|
| 456 |
+
if isinstance(persistence_root, Path):
|
| 457 |
+
persistence_root = persistence_root.resolve()
|
| 458 |
+
|
| 459 |
repo_base = REPO_ROOT / "results" / "best_model"
|
|
|
|
| 460 |
|
| 461 |
candidates = [
|
| 462 |
*configured,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
repo_base / "phase1",
|
| 464 |
repo_base / "phase2",
|
| 465 |
]
|
| 466 |
+
if data_root is not None:
|
| 467 |
+
candidates.extend(
|
| 468 |
+
[
|
| 469 |
+
data_root / "results" / "best_model" / "phase1",
|
| 470 |
+
data_root / "results" / "best_model" / "phase2",
|
| 471 |
+
data_root / "best_model" / "phase1",
|
| 472 |
+
data_root / "best_model" / "phase2",
|
| 473 |
+
]
|
| 474 |
+
)
|
| 475 |
+
if persistence_root is not None:
|
| 476 |
+
candidates.extend(
|
| 477 |
+
[
|
| 478 |
+
persistence_root / "results" / "best_model" / "phase1",
|
| 479 |
+
persistence_root / "results" / "best_model" / "phase2",
|
| 480 |
+
persistence_root / "best_model" / "phase1",
|
| 481 |
+
persistence_root / "best_model" / "phase2",
|
| 482 |
+
]
|
| 483 |
+
)
|
| 484 |
|
| 485 |
# Preserve order, remove duplicates.
|
| 486 |
deduped: list[Path] = []
|
|
|
|
| 507 |
|
| 508 |
|
| 509 |
def _model_storage_base_dir() -> Path:
|
| 510 |
+
candidate_roots: list[Path] = []
|
| 511 |
+
|
| 512 |
+
configured_root = (os.getenv("OPENENV_DATA_DIR") or "").strip()
|
| 513 |
+
if configured_root:
|
| 514 |
+
p = Path(configured_root)
|
| 515 |
+
if not p.is_absolute():
|
| 516 |
+
p = (REPO_ROOT / p).resolve()
|
| 517 |
+
candidate_roots.append(p)
|
| 518 |
+
|
| 519 |
+
persistence_root = getattr(persistence, "data_dir", None)
|
| 520 |
+
if isinstance(persistence_root, Path):
|
| 521 |
+
candidate_roots.append(persistence_root.resolve())
|
| 522 |
+
|
| 523 |
+
candidate_roots.extend(
|
| 524 |
+
[
|
| 525 |
+
(REPO_ROOT / "outputs" / "persist").resolve(),
|
| 526 |
+
Path("/tmp/openenv_rl").resolve(),
|
| 527 |
+
]
|
| 528 |
+
)
|
| 529 |
+
|
| 530 |
+
seen: set[str] = set()
|
| 531 |
+
unique_roots: list[Path] = []
|
| 532 |
+
for root in candidate_roots:
|
| 533 |
+
key = str(root)
|
| 534 |
+
if key in seen:
|
| 535 |
+
continue
|
| 536 |
+
seen.add(key)
|
| 537 |
+
unique_roots.append(root)
|
| 538 |
+
|
| 539 |
+
last_exc: Exception | None = None
|
| 540 |
+
for root in unique_roots:
|
| 541 |
+
try:
|
| 542 |
+
base_dir = root / "results" / "best_model"
|
| 543 |
+
base_dir.mkdir(parents=True, exist_ok=True)
|
| 544 |
+
return base_dir
|
| 545 |
+
except OSError as exc:
|
| 546 |
+
last_exc = exc
|
| 547 |
+
continue
|
| 548 |
+
raise RuntimeError(f"No writable model storage directory found. last_error={last_exc!r}")
|
| 549 |
|
| 550 |
|
| 551 |
def _phase_from_model_path(path: Path) -> int:
|
|
|
|
| 2060 |
)
|
| 2061 |
|
| 2062 |
safe_name = Path(name).name
|
| 2063 |
+
try:
|
| 2064 |
+
base_dir = _model_storage_base_dir()
|
| 2065 |
+
except RuntimeError as exc:
|
| 2066 |
+
raise HTTPException(
|
| 2067 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 2068 |
+
detail=str(exc),
|
| 2069 |
+
) from exc
|
| 2070 |
target_dir = base_dir / f"phase{phase}"
|
| 2071 |
+
try:
|
| 2072 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 2073 |
+
except OSError as exc:
|
| 2074 |
+
raise HTTPException(
|
| 2075 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 2076 |
+
detail=f"Failed to initialize upload directory: {exc}",
|
| 2077 |
+
) from exc
|
| 2078 |
target_path = target_dir / safe_name
|
| 2079 |
|
| 2080 |
total = 0
|