weijiang99 commited on
Commit
52ea128
·
verified ·
1 Parent(s): 5906d8c

Update SpatialBench pipeline

Browse files
.env.example ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # SpatialBench — API Keys
2
+ # Copy this file to .env and fill in your keys.
3
+ # The .env file is read automatically by run_experiments.py and app.py.
4
+ # On HuggingFace Spaces, set these as Space Secrets instead.
5
+
6
+ GEMINI_API_KEY=your_gemini_key_here
7
+ OPENAI_API_KEY=your_openai_key_here
8
+ ANTHROPIC_API_KEY=your_anthropic_key_here
9
+ DEEPSEEK_API_KEY=your_deepseek_key_here
app.py CHANGED
@@ -376,6 +376,9 @@ Key findings:
376
  """
377
 
378
  CSS = """
 
 
 
379
  .leaderboard-table { font-size: 0.9em; }
380
  .status-badge-running { color: #2196F3; font-weight: bold; }
381
  .status-badge-completed { color: #4CAF50; font-weight: bold; }
 
376
  """
377
 
378
  CSS = """
379
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=IBM+Plex+Mono:wght@400;500&display=swap');
380
+ *, body, .gradio-container { font-family: 'Inter', ui-sans-serif, system-ui, sans-serif !important; }
381
+ code, pre, .monospace { font-family: 'IBM Plex Mono', ui-monospace, monospace !important; }
382
  .leaderboard-table { font-size: 0.9em; }
383
  .status-badge-running { color: #2196F3; font-weight: bold; }
384
  .status-badge-completed { color: #4CAF50; font-weight: bold; }
pipeline/job_monitor.py CHANGED
@@ -103,10 +103,8 @@ def _query_slurm(job_ids: list[str]) -> dict[str, JobStatus]:
103
  return {}
104
 
105
 
106
- def submit_sbatch(script_text: str, script_path: str) -> str | None:
107
- """Write script_text to script_path, submit via sbatch, return job_id."""
108
- with open(script_path, "w") as f:
109
- f.write(script_text)
110
  try:
111
  result = subprocess.run(
112
  ["sbatch", script_path],
 
103
  return {}
104
 
105
 
106
+ def submit_sbatch(script_path: str) -> str | None:
107
+ """Submit an existing sbatch script at script_path, return job_id."""
 
 
108
  try:
109
  result = subprocess.run(
110
  ["sbatch", script_path],
requirements.txt CHANGED
@@ -22,11 +22,5 @@ google-generativeai>=0.5.0
22
 
23
  # (DeepSeek uses the OpenAI-compatible client — no extra package needed)
24
 
25
- # Sentence embeddings for reasoning quality analysis
26
- sentence-transformers>=2.6.0
27
-
28
- # ROUGE for reasoning quality analysis
29
- rouge-score>=0.1.2
30
-
31
  # Environment variable loading
32
  python-dotenv>=1.0.0
 
22
 
23
  # (DeepSeek uses the OpenAI-compatible client — no extra package needed)
24
 
 
 
 
 
 
 
25
  # Environment variable loading
26
  python-dotenv>=1.0.0
run_experiments.py CHANGED
@@ -98,7 +98,7 @@ def run_slurm(jobs: list[ExperimentJob], monitor: JobMonitor, dry_run: bool) ->
98
  tmp.write(script_text)
99
  script_path = tmp.name
100
 
101
- job_id = submit_sbatch(script_text, script_path)
102
  if job_id:
103
  monitor.add(
104
  job_id=job_id,
 
98
  tmp.write(script_text)
99
  script_path = tmp.name
100
 
101
+ job_id = submit_sbatch(script_path)
102
  if job_id:
103
  monitor.add(
104
  job_id=job_id,
upload_to_hf.sh ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # upload_to_hf.sh
3
+ # ----------------
4
+ # Upload the pipeline/ folder to a HuggingFace Space.
5
+ #
6
+ # Usage:
7
+ # ./upload_to_hf.sh [HF_SPACE]
8
+ #
9
+ # Examples:
10
+ # ./upload_to_hf.sh your-username/SpatialBench
11
+ # HF_TOKEN=hf_xxx ./upload_to_hf.sh your-username/SpatialBench
12
+ #
13
+ # The script uploads only the files needed to run the Space:
14
+ # app.py, pipeline/, configs/, requirements.txt, README.md
15
+ # It never uploads .env, __pycache__, or result/log directories.
16
+ #
17
+ # Requirements:
18
+ # pip install huggingface_hub
19
+
20
+ set -euo pipefail
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Configuration
24
+ # ---------------------------------------------------------------------------
25
+
26
+ HF_SPACE="${1:-}"
27
+ if [[ -z "$HF_SPACE" ]]; then
28
+ echo "Usage: $0 <your-username/SpaceName>"
29
+ echo "Example: $0 weijiang/SpatialBench"
30
+ exit 1
31
+ fi
32
+
33
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Auth
37
+ # ---------------------------------------------------------------------------
38
+
39
+ if [[ -z "${HF_TOKEN:-}" ]]; then
40
+ # Try reading from huggingface-cli login cache
41
+ HF_TOKEN_FILE="${HOME}/.cache/huggingface/token"
42
+ if [[ -f "$HF_TOKEN_FILE" ]]; then
43
+ HF_TOKEN="$(cat "$HF_TOKEN_FILE")"
44
+ export HF_TOKEN
45
+ fi
46
+ fi
47
+
48
+ if [[ -z "${HF_TOKEN:-}" ]]; then
49
+ echo "No HuggingFace token found."
50
+ echo "Either:"
51
+ echo " 1. Run: huggingface-cli login"
52
+ echo " 2. Set: export HF_TOKEN=hf_xxx"
53
+ exit 1
54
+ fi
55
+
56
+ echo "Uploading to HuggingFace Space: $HF_SPACE"
57
+ echo "Source directory: $SCRIPT_DIR"
58
+ echo ""
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Upload via Python (huggingface_hub)
62
+ # ---------------------------------------------------------------------------
63
+
64
+ python - <<PYEOF
65
+ import os
66
+ import sys
67
+ from pathlib import Path
68
+ from huggingface_hub import HfApi, upload_folder
69
+
70
+ api = HfApi(token=os.environ["HF_TOKEN"])
71
+ space_id = "$HF_SPACE"
72
+ local_dir = Path("$SCRIPT_DIR")
73
+
74
+ # Ensure the Space exists (type=gradio); if it already exists this is a no-op.
75
+ try:
76
+ api.create_repo(
77
+ repo_id=space_id,
78
+ repo_type="space",
79
+ space_sdk="gradio",
80
+ exist_ok=True,
81
+ private=True,
82
+ )
83
+ print(f"Space ready: https://huggingface.co/spaces/{space_id}")
84
+ except Exception as e:
85
+ print(f"[WARN] Could not create/verify Space: {e}")
86
+ print("Proceeding with upload anyway...")
87
+
88
+ # Files/dirs to ignore (never upload secrets, cache, or large result dirs)
89
+ IGNORE_PATTERNS = [
90
+ ".env",
91
+ "*.pyc",
92
+ "__pycache__",
93
+ "*.egg-info",
94
+ ".git",
95
+ ".gitignore",
96
+ # Large result/log directories that live outside pipeline/ anyway
97
+ "spatial-reasoning-results*",
98
+ "spatial_reasoning_logs",
99
+ "llm-maze-evaluation-results",
100
+ "eval_llm_logs",
101
+ ]
102
+
103
+ print(f"\nUploading files from: {local_dir}")
104
+ print("Ignoring: " + ", ".join(IGNORE_PATTERNS))
105
+ print("")
106
+
107
+ url = upload_folder(
108
+ repo_id=space_id,
109
+ repo_type="space",
110
+ folder_path=str(local_dir),
111
+ path_in_repo=".",
112
+ ignore_patterns=IGNORE_PATTERNS,
113
+ commit_message="Update SpatialBench pipeline",
114
+ token=os.environ["HF_TOKEN"],
115
+ )
116
+
117
+ print(f"\nUpload complete!")
118
+ print(f"Space URL: https://huggingface.co/spaces/{space_id}")
119
+ PYEOF