Somrat Sorkar commited on
Commit
e7f8665
·
unverified ·
2 Parent(s): 7e83c86b81080d

Merge pull request #15 from anurag162008/main

Browse files

feat:Prune stale dataset files and enhance sync and installation methods

Files changed (4) hide show
  1. .env.example +7 -0
  2. multi-provider-key-rotator.cjs +23 -8
  3. openclaw-sync.py +25 -0
  4. start.sh +127 -0
.env.example CHANGED
@@ -175,6 +175,13 @@ LLM_API_KEY_FALLBACK_ENABLED=true
175
  # is also picked up by OpenClaw → that provider's models become
176
  # available in the Control UI for manual selection.
177
  # 3. Rotation pools (*_API_KEYS) work for every active provider
 
 
 
 
 
 
 
178
  # independently and in parallel.
179
  #
180
  # EXAMPLE — default Anthropic, also use OpenAI and Groq:
 
175
  # is also picked up by OpenClaw → that provider's models become
176
  # available in the Control UI for manual selection.
177
  # 3. Rotation pools (*_API_KEYS) work for every active provider
178
+ #
179
+ # Optional: explicitly pin model lists per provider for Control UI visibility
180
+ # when provider keys are configured.
181
+ # Format: comma-separated model IDs
182
+ # NVIDIA_MODELS=meta/llama-3.1-70b-instruct,nvidia/llama-3.1-nemotron-70b-instruct
183
+ # OPENAI_MODELS=gpt-4o-mini,gpt-4.1
184
+ # GROQ_MODELS=llama-3.3-70b-versatile,deepseek-r1-distill-llama-70b
185
  # independently and in parallel.
186
  #
187
  # EXAMPLE — default Anthropic, also use OpenAI and Groq:
multi-provider-key-rotator.cjs CHANGED
@@ -53,6 +53,7 @@ const PROVIDERS = [
53
  hostname: /(?:^|\.)(?:generativelanguage\.googleapis\.com|aiplatform\.googleapis\.com)$/i,
54
  envPlural: 'GEMINI_API_KEYS',
55
  envSingular:'GEMINI_API_KEY',
 
56
  },
57
  {
58
  name: 'deepseek',
@@ -285,15 +286,22 @@ function patchFetch() {
285
  const hostname = resolveHostname(urlLike);
286
  const provider = matchProvider(hostname);
287
 
288
- if (provider) {
289
  const key = nextKey(provider);
290
  if (key) {
291
- const headers = init.headers || (input && input.headers) || undefined;
292
- const patchedHeaders = setAuthHeader(headers, key);
293
- init = { ...init, headers: patchedHeaders };
294
-
295
- if (input && typeof input === 'object' && !(input instanceof URL) && input.headers) {
296
- try { input = new Request(input, { headers: patchedHeaders }); } catch { /* noop */ }
 
 
 
 
 
 
 
297
  }
298
  }
299
  }
@@ -319,7 +327,14 @@ function patchHttpModule(mod) {
319
  if (provider) {
320
  const key = nextKey(provider);
321
  if (key) {
322
- if (typeof options === 'string' || options instanceof URL) {
 
 
 
 
 
 
 
323
  const u = new URL(String(options));
324
  args[0] = {
325
  protocol: u.protocol,
 
53
  hostname: /(?:^|\.)(?:generativelanguage\.googleapis\.com|aiplatform\.googleapis\.com)$/i,
54
  envPlural: 'GEMINI_API_KEYS',
55
  envSingular:'GEMINI_API_KEY',
56
+ queryParam: true,
57
  },
58
  {
59
  name: 'deepseek',
 
286
  const hostname = resolveHostname(urlLike);
287
  const provider = matchProvider(hostname);
288
 
289
+ if (provider) {
290
  const key = nextKey(provider);
291
  if (key) {
292
+ if (provider.queryParam) {
293
+ // Gemini: key URL query param mein jaata hai, Bearer nahi
294
+ const url = new URL(typeof input === 'string' ? input : input.url);
295
+ url.searchParams.set('key', key);
296
+ input = typeof input === 'string' ? url.toString() : new Request(url.toString(), input);
297
+ } else {
298
+ const headers = init.headers || (input && input.headers) || undefined;
299
+ const patchedHeaders = setAuthHeader(headers, key);
300
+ init = { ...init, headers: patchedHeaders };
301
+
302
+ if (input && typeof input === 'object' && !(input instanceof URL) && input.headers) {
303
+ try { input = new Request(input, { headers: patchedHeaders }); } catch { /* noop */ }
304
+ }
305
  }
306
  }
307
  }
 
327
  if (provider) {
328
  const key = nextKey(provider);
329
  if (key) {
330
+ if (provider.queryParam) {
331
+ // Gemini: ?key= query param use karo
332
+ const u = new URL(String(typeof options === 'string' || options instanceof URL ? options : `https://${options.hostname}${options.path || '/'}`));
333
+ u.searchParams.set('key', key);
334
+ args[0] = typeof options === 'object' && !(options instanceof URL)
335
+ ? { ...options, path: `${u.pathname}${u.search}` }
336
+ : u.toString();
337
+ } else if (typeof options === 'string' || options instanceof URL) {
338
  const u = new URL(String(options));
339
  args[0] = {
340
  protocol: u.protocol,
openclaw-sync.py CHANGED
@@ -360,6 +360,30 @@ def create_snapshot_dir(source_root: Path) -> Path:
360
  return staging_root
361
 
362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  def restore_workspace() -> bool:
364
  if not HF_TOKEN:
365
  write_status("disabled", "HF_TOKEN is not configured.")
@@ -459,6 +483,7 @@ def _sync_once_unlocked(
459
  commit_message=f"HuggingClaw sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
460
  ignore_patterns=[".git/*", ".git"],
461
  )
 
462
  finally:
463
  shutil.rmtree(snapshot_dir, ignore_errors=True)
464
 
 
360
  return staging_root
361
 
362
 
363
+ def prune_remote_deleted_files(repo_id: str, snapshot_dir: Path) -> None:
364
+ if HF_API is None:
365
+ return
366
+
367
+ local_files = {
368
+ path.relative_to(snapshot_dir).as_posix()
369
+ for path in snapshot_dir.rglob("*")
370
+ if path.is_file()
371
+ }
372
+
373
+ remote_files = HF_API.list_repo_files(repo_id=repo_id, repo_type="dataset")
374
+ stale_files = [
375
+ path for path in remote_files
376
+ if path not in local_files and path not in {".gitattributes"}
377
+ ]
378
+ if stale_files:
379
+ HF_API.delete_files(
380
+ delete_patterns=stale_files,
381
+ repo_id=repo_id,
382
+ repo_type="dataset",
383
+ commit_message="Prune stale files after workspace sync",
384
+ )
385
+
386
+
387
  def restore_workspace() -> bool:
388
  if not HF_TOKEN:
389
  write_status("disabled", "HF_TOKEN is not configured.")
 
483
  commit_message=f"HuggingClaw sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
484
  ignore_patterns=[".git/*", ".git"],
485
  )
486
+ prune_remote_deleted_files(repo_id, snapshot_dir)
487
  finally:
488
  shutil.rmtree(snapshot_dir, ignore_errors=True)
489
 
start.sh CHANGED
@@ -131,6 +131,57 @@ case "$LLM_PROVIDER" in
131
  ;;
132
  esac
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # ── Setup directories ──
135
  mkdir -p /home/node/.openclaw/agents/main/sessions
136
  mkdir -p /home/node/.openclaw/credentials
@@ -285,6 +336,82 @@ if [ -n "$CUSTOM_PROVIDER_NAME" ] || [ -n "$CUSTOM_BASE_URL" ] || [ -n "$CUSTOM_
285
  fi
286
  fi
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  # Browser configuration (managed local Chromium in HF/Docker)
289
  BROWSER_EXECUTABLE_PATH=""
290
  for candidate in /usr/bin/chromium /usr/bin/chromium-browser /snap/bin/chromium; do
 
131
  ;;
132
  esac
133
 
134
+ # Ensure OpenClaw provider discovery can see per-provider keys even when users
135
+ # configure only *_API_KEYS pools. Mirror first pool key into singular env.
136
+ promote_first_pool_key() {
137
+ local singular_var="$1"
138
+ local pool_var="$2"
139
+ local singular_val="${!singular_var:-}"
140
+ local pool_val="${!pool_var:-}"
141
+
142
+ [ -n "$singular_val" ] && return 0
143
+ [ -n "$pool_val" ] || return 0
144
+
145
+ local first
146
+ first=$(printf '%s' "$pool_val" | tr ',' '\n' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | awk 'NF{print; exit}')
147
+ [ -n "$first" ] || return 0
148
+ export "${singular_var}=$first"
149
+ }
150
+
151
+ promote_first_pool_key "ANTHROPIC_API_KEY" "ANTHROPIC_API_KEYS"
152
+ promote_first_pool_key "OPENAI_API_KEY" "OPENAI_API_KEYS"
153
+ promote_first_pool_key "GEMINI_API_KEY" "GEMINI_API_KEYS"
154
+ promote_first_pool_key "GEMINI_API_KEY" "GOOGLE_API_KEYS"
155
+ promote_first_pool_key "DEEPSEEK_API_KEY" "DEEPSEEK_API_KEYS"
156
+ promote_first_pool_key "OPENROUTER_API_KEY" "OPENROUTER_API_KEYS"
157
+ promote_first_pool_key "KILOCODE_API_KEY" "KILOCODE_API_KEYS"
158
+ promote_first_pool_key "OPENCODE_API_KEY" "OPENCODE_API_KEYS"
159
+ promote_first_pool_key "ZAI_API_KEY" "ZAI_API_KEYS"
160
+ promote_first_pool_key "MOONSHOT_API_KEY" "MOONSHOT_API_KEYS"
161
+ promote_first_pool_key "MINIMAX_API_KEY" "MINIMAX_API_KEYS"
162
+ promote_first_pool_key "XIAOMI_API_KEY" "XIAOMI_API_KEYS"
163
+ promote_first_pool_key "VOLCANO_ENGINE_API_KEY" "VOLCANO_ENGINE_API_KEYS"
164
+ promote_first_pool_key "BYTEPLUS_API_KEY" "BYTEPLUS_API_KEYS"
165
+ promote_first_pool_key "QIANFAN_API_KEY" "QIANFAN_API_KEYS"
166
+ promote_first_pool_key "MODELSTUDIO_API_KEY" "MODELSTUDIO_API_KEYS"
167
+ promote_first_pool_key "KIMI_API_KEY" "KIMI_API_KEYS"
168
+ promote_first_pool_key "MISTRAL_API_KEY" "MISTRAL_API_KEYS"
169
+ promote_first_pool_key "XAI_API_KEY" "XAI_API_KEYS"
170
+ promote_first_pool_key "NVIDIA_API_KEY" "NVIDIA_API_KEYS"
171
+ promote_first_pool_key "GROQ_API_KEY" "GROQ_API_KEYS"
172
+ promote_first_pool_key "COHERE_API_KEY" "COHERE_API_KEYS"
173
+ promote_first_pool_key "TOGETHER_API_KEY" "TOGETHER_API_KEYS"
174
+ promote_first_pool_key "CEREBRAS_API_KEY" "CEREBRAS_API_KEYS"
175
+ promote_first_pool_key "VENICE_API_KEY" "VENICE_API_KEYS"
176
+ promote_first_pool_key "SYNTHETIC_API_KEY" "SYNTHETIC_API_KEYS"
177
+ promote_first_pool_key "COPILOT_GITHUB_TOKEN" "COPILOT_GITHUB_TOKENS"
178
+ promote_first_pool_key "HUGGINGFACE_HUB_TOKEN" "HUGGINGFACE_HUB_TOKENS"
179
+
180
+ # Compatibility aliases for Google provider secrets some users already have.
181
+ if [ -z "${GEMINI_API_KEY:-}" ] && [ -n "${GOOGLE_API_KEY:-}" ]; then
182
+ export GEMINI_API_KEY="$GOOGLE_API_KEY"
183
+ fi
184
+
185
  # ── Setup directories ──
186
  mkdir -p /home/node/.openclaw/agents/main/sessions
187
  mkdir -p /home/node/.openclaw/credentials
 
336
  fi
337
  fi
338
 
339
+ # Optional: explicitly expose provider model lists in Control UI when
340
+ # provider keys are configured. Format:
341
+ # NVIDIA_MODELS=model1,model2
342
+ # OPENAI_MODELS=gpt-4o-mini,gpt-4.1
343
+ # This helps when provider auto-discovery does not populate models reliably.
344
+ inject_provider_models_from_env() {
345
+ local provider="$1"
346
+ local models_env="$2"
347
+ local key_env_single="$3"
348
+ local key_env_pool="$4"
349
+ local models_csv="${!models_env:-}"
350
+ local single_key="${!key_env_single:-}"
351
+ local pool_keys="${!key_env_pool:-}"
352
+
353
+ # Only inject when both:
354
+ # 1) provider has at least one configured key
355
+ # 2) explicit model list env is provided
356
+ if [ -z "$models_csv" ] || { [ -z "$single_key" ] && [ -z "$pool_keys" ]; }; then
357
+ return 0
358
+ fi
359
+
360
+ local models_json
361
+ models_json=$(printf '%s' "$models_csv" \
362
+ | tr ',' '\n' \
363
+ | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' \
364
+ | awk 'NF' \
365
+ | jq -R . \
366
+ | jq -s 'map({id: ., name: .}) | unique_by(.id)')
367
+
368
+ CONFIG_JSON=$(jq \
369
+ --arg provider "$provider" \
370
+ --argjson models "$models_json" \
371
+ '.models.mode = "merge"
372
+ | .models.providers[$provider] = ((.models.providers[$provider] // {}) + {models: $models})' <<<"$CONFIG_JSON")
373
+ }
374
+
375
+ # Built-in provider model envs (optional)
376
+ inject_provider_models_from_env "anthropic" "ANTHROPIC_MODELS" "ANTHROPIC_API_KEY" "ANTHROPIC_API_KEYS"
377
+ inject_provider_models_from_env "openai" "OPENAI_MODELS" "OPENAI_API_KEY" "OPENAI_API_KEYS"
378
+ inject_provider_models_from_env "openai-codex" "OPENAI_MODELS" "OPENAI_API_KEY" "OPENAI_API_KEYS"
379
+ inject_provider_models_from_env "google" "GEMINI_MODELS" "GEMINI_API_KEY" "GEMINI_API_KEYS"
380
+ inject_provider_models_from_env "google-vertex" "GEMINI_MODELS" "GEMINI_API_KEY" "GEMINI_API_KEYS"
381
+ inject_provider_models_from_env "deepseek" "DEEPSEEK_MODELS" "DEEPSEEK_API_KEY" "DEEPSEEK_API_KEYS"
382
+ inject_provider_models_from_env "openrouter" "OPENROUTER_MODELS" "OPENROUTER_API_KEY" "OPENROUTER_API_KEYS"
383
+ inject_provider_models_from_env "kilocode" "KILOCODE_MODELS" "KILOCODE_API_KEY" "KILOCODE_API_KEYS"
384
+ inject_provider_models_from_env "opencode" "OPENCODE_MODELS" "OPENCODE_API_KEY" "OPENCODE_API_KEYS"
385
+ inject_provider_models_from_env "opencode-go" "OPENCODE_MODELS" "OPENCODE_API_KEY" "OPENCODE_API_KEYS"
386
+ inject_provider_models_from_env "zai" "ZAI_MODELS" "ZAI_API_KEY" "ZAI_API_KEYS"
387
+ inject_provider_models_from_env "z-ai" "ZAI_MODELS" "ZAI_API_KEY" "ZAI_API_KEYS"
388
+ inject_provider_models_from_env "z.ai" "ZAI_MODELS" "ZAI_API_KEY" "ZAI_API_KEYS"
389
+ inject_provider_models_from_env "zhipu" "ZAI_MODELS" "ZAI_API_KEY" "ZAI_API_KEYS"
390
+ inject_provider_models_from_env "moonshot" "MOONSHOT_MODELS" "MOONSHOT_API_KEY" "MOONSHOT_API_KEYS"
391
+ inject_provider_models_from_env "kimi-coding" "KIMI_MODELS" "KIMI_API_KEY" "KIMI_API_KEYS"
392
+ inject_provider_models_from_env "minimax" "MINIMAX_MODELS" "MINIMAX_API_KEY" "MINIMAX_API_KEYS"
393
+ inject_provider_models_from_env "modelstudio" "MODELSTUDIO_MODELS" "MODELSTUDIO_API_KEY" "MODELSTUDIO_API_KEYS"
394
+ inject_provider_models_from_env "qwen" "MODELSTUDIO_MODELS" "MODELSTUDIO_API_KEY" "MODELSTUDIO_API_KEYS"
395
+ inject_provider_models_from_env "xiaomi" "XIAOMI_MODELS" "XIAOMI_API_KEY" "XIAOMI_API_KEYS"
396
+ inject_provider_models_from_env "volcengine" "VOLCANO_ENGINE_MODELS" "VOLCANO_ENGINE_API_KEY" "VOLCANO_ENGINE_API_KEYS"
397
+ inject_provider_models_from_env "volcengine-plan" "VOLCANO_ENGINE_MODELS" "VOLCANO_ENGINE_API_KEY" "VOLCANO_ENGINE_API_KEYS"
398
+ inject_provider_models_from_env "byteplus" "BYTEPLUS_MODELS" "BYTEPLUS_API_KEY" "BYTEPLUS_API_KEYS"
399
+ inject_provider_models_from_env "byteplus-plan" "BYTEPLUS_MODELS" "BYTEPLUS_API_KEY" "BYTEPLUS_API_KEYS"
400
+ inject_provider_models_from_env "qianfan" "QIANFAN_MODELS" "QIANFAN_API_KEY" "QIANFAN_API_KEYS"
401
+ inject_provider_models_from_env "groq" "GROQ_MODELS" "GROQ_API_KEY" "GROQ_API_KEYS"
402
+ inject_provider_models_from_env "mistral" "MISTRAL_MODELS" "MISTRAL_API_KEY" "MISTRAL_API_KEYS"
403
+ inject_provider_models_from_env "mistralai" "MISTRAL_MODELS" "MISTRAL_API_KEY" "MISTRAL_API_KEYS"
404
+ inject_provider_models_from_env "xai" "XAI_MODELS" "XAI_API_KEY" "XAI_API_KEYS"
405
+ inject_provider_models_from_env "x-ai" "XAI_MODELS" "XAI_API_KEY" "XAI_API_KEYS"
406
+ inject_provider_models_from_env "nvidia" "NVIDIA_MODELS" "NVIDIA_API_KEY" "NVIDIA_API_KEYS"
407
+ inject_provider_models_from_env "cohere" "COHERE_MODELS" "COHERE_API_KEY" "COHERE_API_KEYS"
408
+ inject_provider_models_from_env "together" "TOGETHER_MODELS" "TOGETHER_API_KEY" "TOGETHER_API_KEYS"
409
+ inject_provider_models_from_env "cerebras" "CEREBRAS_MODELS" "CEREBRAS_API_KEY" "CEREBRAS_API_KEYS"
410
+ inject_provider_models_from_env "huggingface" "HUGGINGFACE_MODELS" "HUGGINGFACE_HUB_TOKEN" "HUGGINGFACE_HUB_TOKENS"
411
+ inject_provider_models_from_env "venice" "VENICE_MODELS" "VENICE_API_KEY" "VENICE_API_KEYS"
412
+ inject_provider_models_from_env "synthetic" "SYNTHETIC_MODELS" "SYNTHETIC_API_KEY" "SYNTHETIC_API_KEYS"
413
+ inject_provider_models_from_env "github-copilot" "GITHUB_COPILOT_MODELS" "COPILOT_GITHUB_TOKEN" "COPILOT_GITHUB_TOKENS"
414
+
415
  # Browser configuration (managed local Chromium in HF/Docker)
416
  BROWSER_EXECUTABLE_PATH=""
417
  for candidate in /usr/bin/chromium /usr/bin/chromium-browser /snap/bin/chromium; do