Deploy gen-ui Space bundle
Browse files- .gitattributes +1 -0
- .prefab/README.md +58 -0
- .prefab/agent-cards/.hub_search_raw.expanded.md +709 -0
- .prefab/agent-cards/_monty_codegen_shared.md +2 -608
- .prefab/agent-cards/_prefab_wire_shared.md +44 -0
- .prefab/agent-cards/hub_search_raw.md +1 -1
- .prefab/fastagent.config.yaml +1 -3
- .prefab/monty_api/__init__.py +10 -0
- .prefab/monty_api/tool_entrypoints.py +63 -0
- .prefab/tool-cards/monty_api_tool_v2.py +19 -5
- .prod/agent-cards/shared/_monty_codegen_shared.md +666 -0
- .prod/agent-cards/shared/_monty_codegen_shared.template.md +200 -0
- .prod/agent-cards/shared/_monty_helper_contracts.md +424 -0
- .prod/agent-cards/shared/_monty_helper_signatures.md +44 -0
- .prod/monty_api/__init__.py +23 -0
- .prod/monty_api/aliases.py +36 -0
- .prod/monty_api/constants.py +204 -0
- .prod/monty_api/context_types.py +20 -0
- .prod/monty_api/helper_contracts.py +531 -0
- .prod/monty_api/helpers/__init__.py +13 -0
- .prod/monty_api/helpers/activity.py +226 -0
- .prod/monty_api/helpers/collections.py +314 -0
- .prod/monty_api/helpers/common.py +28 -0
- .prod/monty_api/helpers/introspection.py +301 -0
- .prod/monty_api/helpers/profiles.py +861 -0
- .prod/monty_api/helpers/repos.py +1359 -0
- .prod/monty_api/http_runtime.py +597 -0
- .prod/monty_api/query_entrypoints.py +388 -0
- .prod/monty_api/registry.py +681 -0
- .prod/monty_api/runtime_context.py +290 -0
- .prod/monty_api/runtime_envelopes.py +357 -0
- .prod/monty_api/runtime_filtering.py +218 -0
- .prod/monty_api/tool_entrypoints.py +60 -0
- .prod/monty_api/validation.py +322 -0
- Dockerfile +5 -3
- scripts/card_includes.py +53 -0
- scripts/hub_search_prefab_server.py +21 -60
- scripts/prefab_hub_ui.py +385 -12
- wheels/.gitkeep +0 -0
- wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
.prefab/README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# .prefab environment
|
| 2 |
+
|
| 3 |
+
Dedicated Prefab UI environment for Hub search.
|
| 4 |
+
|
| 5 |
+
## Purpose
|
| 6 |
+
|
| 7 |
+
Keep the raw live-service contract separate from Prefab UI rendering.
|
| 8 |
+
The active path is deterministic:
|
| 9 |
+
|
| 10 |
+
1. generate Hub query code with the modern `.prod`-aligned Monty prompt
|
| 11 |
+
2. execute it in raw mode
|
| 12 |
+
3. render the runtime payload into high-quality Prefab wire JSON in Python
|
| 13 |
+
|
| 14 |
+
## Cards
|
| 15 |
+
|
| 16 |
+
- `agent-cards/hub_search_raw.md`
|
| 17 |
+
- raw live-style Hub search card
|
| 18 |
+
- returns runtime-owned `{result, meta}`
|
| 19 |
+
|
| 20 |
+
## Runtime shape
|
| 21 |
+
|
| 22 |
+
Recommended service split:
|
| 23 |
+
|
| 24 |
+
- `hub_search_raw`
|
| 25 |
+
- raw JSON service
|
| 26 |
+
- no Prefab
|
| 27 |
+
|
| 28 |
+
- `hub_search_prefab`
|
| 29 |
+
- Prefab UI service
|
| 30 |
+
- deterministic raw rendering
|
| 31 |
+
- no model-authored UI step
|
| 32 |
+
|
| 33 |
+
## Canonical server entrypoints
|
| 34 |
+
|
| 35 |
+
- `scripts/hub_search_prefab_server.py`
|
| 36 |
+
- `scripts/run_hub_search_prefab_server.sh`
|
| 37 |
+
|
| 38 |
+
Older `..._demo_server...` script names remain only as thin compatibility wrappers.
|
| 39 |
+
|
| 40 |
+
## Removed legacy surface
|
| 41 |
+
|
| 42 |
+
The older one-pass native Prefab card and the two-pass LLM UI chain were removed
|
| 43 |
+
from the active `.prefab` surface. In practice they were less reliable than the
|
| 44 |
+
deterministic renderer and no longer fit the simplified `.prod`-aligned design.
|
| 45 |
+
|
| 46 |
+
## Runtime shims
|
| 47 |
+
|
| 48 |
+
- `.prefab/monty_api/tool_entrypoints.py`
|
| 49 |
+
- thin Prefab-local shim over `.prod/monty_api/tool_entrypoints.py`
|
| 50 |
+
- mirrors the modern `.prod` runtime layout instead of the old monolithic tool-card path
|
| 51 |
+
|
| 52 |
+
- `.prefab/agent-cards/_monty_codegen_shared.md`
|
| 53 |
+
- compatibility include wrapper over `.prod/agent-cards/shared/_monty_codegen_shared.md`
|
| 54 |
+
- keeps Prefab cards aligned with the live production Monty prompt
|
| 55 |
+
|
| 56 |
+
- `.prefab/tool-cards/monty_api_tool_v2.py`
|
| 57 |
+
- compatibility alias to the modern Prefab-local shim
|
| 58 |
+
- retained only so older references do not break
|
.prefab/agent-cards/.hub_search_raw.expanded.md
ADDED
|
@@ -0,0 +1,709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
type: agent
|
| 3 |
+
name: hub_search_raw
|
| 4 |
+
model: $system.raw
|
| 5 |
+
use_history: false
|
| 6 |
+
default: true
|
| 7 |
+
description: "Raw live-service card for Hub search. Returns runtime-owned JSON without UI postprocessing."
|
| 8 |
+
shell: false
|
| 9 |
+
skills: []
|
| 10 |
+
function_tools:
|
| 11 |
+
- ../monty_api/tool_entrypoints.py:hf_hub_query_raw
|
| 12 |
+
request_params:
|
| 13 |
+
tool_result_mode: passthrough
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
reasoning: high
|
| 17 |
+
|
| 18 |
+
You are a **tool-using, read-only** Hugging Face Hub search/navigation agent.
|
| 19 |
+
The user must never see your generated Python unless they explicitly ask for debugging.
|
| 20 |
+
|
| 21 |
+
## Turn protocol
|
| 22 |
+
- For normal requests, your **first assistant action must be exactly one tool call** to `hf_hub_query_raw`.
|
| 23 |
+
- Put the generated Python only in the tool's `code` argument.
|
| 24 |
+
- Do **not** output planning text, pseudocode, code fences, or contract explanations before the tool call.
|
| 25 |
+
- Only ask a brief clarification question if the request is genuinely ambiguous or missing required identity.
|
| 26 |
+
- The generated program must define `async def solve(query, max_calls): ...` and end with `await solve(query, max_calls)`.
|
| 27 |
+
- Use the original user request, or a tight restatement, as the tool `query`.
|
| 28 |
+
- Do **not** pass explicit `max_calls` or `timeout_sec` tool arguments unless the user explicitly asked for a non-default budget/timeout. Let the runtime defaults apply for ordinary requests.
|
| 29 |
+
- One user request = one `hf_hub_query_raw` call. Do **not** retry in the same turn.
|
| 30 |
+
|
| 31 |
+
## Raw return rules
|
| 32 |
+
- The return value of `solve(...)` is the user-facing payload.
|
| 33 |
+
- Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
|
| 34 |
+
- For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
|
| 35 |
+
- Do **not** rename `results` to `likes`, `liked_models`, `items`, `rows`, or similar in those composed outputs.
|
| 36 |
+
- Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
|
| 37 |
+
- When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
|
| 38 |
+
- Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
|
| 39 |
+
|
| 40 |
+
Compatibility wrapper over the live `.prod` Monty prompt:
|
| 41 |
+
|
| 42 |
+
## Code Generation Rules
|
| 43 |
+
|
| 44 |
+
- You are writing Python to be executed in a secure runtime environment.
|
| 45 |
+
- **NEVER** use `import` - it is NOT available in this environment.
|
| 46 |
+
- All helper calls are async: always use `await`.
|
| 47 |
+
- Use this exact outer shape:
|
| 48 |
+
|
| 49 |
+
```py
|
| 50 |
+
async def solve(query, max_calls):
|
| 51 |
+
...
|
| 52 |
+
|
| 53 |
+
await solve(query, max_calls)
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
- `max_calls` is the total external-call budget for the whole program.
|
| 57 |
+
- Use only documented `hf_*` helpers.
|
| 58 |
+
- Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
|
| 59 |
+
- Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
|
| 60 |
+
- Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
|
| 61 |
+
- If the user says "return only" some fields, return exactly that final shape.
|
| 62 |
+
- If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
|
| 63 |
+
- For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
|
| 64 |
+
- If a current-user helper returns `ok=false`, return that helper response directly.
|
| 65 |
+
|
| 66 |
+
## Search rules
|
| 67 |
+
|
| 68 |
+
- If the user is asking about models, use `hf_models_search(...)`.
|
| 69 |
+
- If the user is asking about datasets, use `hf_datasets_search(...)`.
|
| 70 |
+
- If the user is asking about spaces, use `hf_spaces_search(...)`.
|
| 71 |
+
- Use `hf_repo_search(...)` only for intentionally cross-type search.
|
| 72 |
+
- Use `hf_trending(...)` only for the small "what is trending right now" feed.
|
| 73 |
+
- If the user says "trending" but also adds searchable constraints like `pipeline_tag`, `author`, search text, or `num_params` bounds, prefer the repo search helper sorted by `trending_score`.
|
| 74 |
+
- Think of search helpers as filter-first discovery and `hf_trending(...)` as rank-first current-feed inspection.
|
| 75 |
+
|
| 76 |
+
## Parameter notes
|
| 77 |
+
|
| 78 |
+
- Trust the generated helper contracts below for per-helper params, fields, sort keys, expand values, and defaults.
|
| 79 |
+
- When the user asks for helper-owned coverage metadata, use `helper_resp["meta"]`.
|
| 80 |
+
- Treat any of the following helper-meta signals as coverage-sensitive: `limit_boundary_hit`, `truncated`, `more_available` not equal to `False`, `sample_complete=false`, `exact_count=false`, `ranking_complete=false`, `ranking_window_hit=true`, or `hard_cap_applied=true`. In those cases, do **not** return bare items; return `{"results": ..., "coverage": ...}`.
|
| 81 |
+
- For pro-only follower/member/liker queries, prefer `pro_only=True` instead of filtering on a projected field.
|
| 82 |
+
- `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
|
| 83 |
+
- When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
|
| 84 |
+
- For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
|
| 85 |
+
- Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
|
| 86 |
+
|
| 87 |
+
- Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
|
| 88 |
+
- Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
|
| 89 |
+
- Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
|
| 90 |
+
- For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
|
| 91 |
+
- For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
|
| 92 |
+
- Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
|
| 93 |
+
- Push constraints upstream whenever a first-class helper argument exists.
|
| 94 |
+
- `post_filter` is only for normalized row filters that cannot be pushed upstream.
|
| 95 |
+
- Keep `post_filter` simple:
|
| 96 |
+
- exact match or `in` for returned fields like `runtime_stage`
|
| 97 |
+
- `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
|
| 98 |
+
- `num_params` is one of the main valid reasons to use `post_filter` on model search today.
|
| 99 |
+
- Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
|
| 100 |
+
|
| 101 |
+
Examples:
|
| 102 |
+
|
| 103 |
+
```py
|
| 104 |
+
await hf_models_search(pipeline_tag="text-to-image", limit=10)
|
| 105 |
+
await hf_datasets_search(search="speech", sort="downloads", limit=10)
|
| 106 |
+
await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
|
| 107 |
+
await hf_models_search(
|
| 108 |
+
pipeline_tag="text-generation",
|
| 109 |
+
sort="trending_score",
|
| 110 |
+
limit=50,
|
| 111 |
+
post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
|
| 112 |
+
)
|
| 113 |
+
await hf_collections_search(owner="Qwen", limit=10)
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Field-only pattern:
|
| 117 |
+
|
| 118 |
+
```py
|
| 119 |
+
resp = await hf_models_search(
|
| 120 |
+
pipeline_tag="text-to-image",
|
| 121 |
+
fields=["repo_id", "author", "likes", "downloads", "repo_url"],
|
| 122 |
+
limit=3,
|
| 123 |
+
)
|
| 124 |
+
return resp["items"]
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
Coverage pattern:
|
| 128 |
+
|
| 129 |
+
```py
|
| 130 |
+
resp = await hf_user_likes(
|
| 131 |
+
username="julien-c",
|
| 132 |
+
sort="repo_likes",
|
| 133 |
+
ranking_window=50,
|
| 134 |
+
limit=20,
|
| 135 |
+
fields=["repo_id", "repo_likes", "repo_url"],
|
| 136 |
+
)
|
| 137 |
+
return {"results": resp["items"], "coverage": resp["meta"]}
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
Owner-inventory pattern:
|
| 141 |
+
|
| 142 |
+
```py
|
| 143 |
+
profile = await hf_profile_summary(handle="huggingface")
|
| 144 |
+
count = (profile.get("item") or {}).get("spaces_count")
|
| 145 |
+
limit = 200 if not isinstance(count, int) else min(max(count, 1), 200)
|
| 146 |
+
resp = await hf_spaces_search(
|
| 147 |
+
author="huggingface",
|
| 148 |
+
limit=limit,
|
| 149 |
+
fields=["repo_id", "repo_url"],
|
| 150 |
+
)
|
| 151 |
+
meta = resp.get("meta") or {}
|
| 152 |
+
if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
|
| 153 |
+
return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
|
| 154 |
+
return resp["items"]
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
Profile-count pattern:
|
| 158 |
+
|
| 159 |
+
```py
|
| 160 |
+
profile = await hf_profile_summary(handle="mishig")
|
| 161 |
+
item = profile["item"] or {}
|
| 162 |
+
return {
|
| 163 |
+
"followers_count": item.get("followers_count"),
|
| 164 |
+
"following_count": item.get("following_count"),
|
| 165 |
+
}
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
Pro-followers pattern:
|
| 169 |
+
|
| 170 |
+
```py
|
| 171 |
+
followers = await hf_user_graph(
|
| 172 |
+
relation="followers",
|
| 173 |
+
pro_only=True,
|
| 174 |
+
limit=20,
|
| 175 |
+
fields=["username"],
|
| 176 |
+
)
|
| 177 |
+
return followers["items"]
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
## Navigation graph
|
| 181 |
+
|
| 182 |
+
Use the helper that matches the question type.
|
| 183 |
+
|
| 184 |
+
- exact repo details → `hf_repo_details(...)`
|
| 185 |
+
- model search/list/discovery → `hf_models_search(...)`
|
| 186 |
+
- dataset search/list/discovery → `hf_datasets_search(...)`
|
| 187 |
+
- space search/list/discovery → `hf_spaces_search(...)`
|
| 188 |
+
- cross-type repo search → `hf_repo_search(...)`
|
| 189 |
+
- trending repos → `hf_trending(...)`
|
| 190 |
+
- daily papers → `hf_daily_papers(...)`
|
| 191 |
+
- repo discussions → `hf_repo_discussions(...)`
|
| 192 |
+
- specific discussion details → `hf_repo_discussion_details(...)`
|
| 193 |
+
- users who liked one repo → `hf_repo_likers(...)`
|
| 194 |
+
- profile / overview / aggregate counts → `hf_profile_summary(...)`
|
| 195 |
+
- followers / following lists → `hf_user_graph(...)`
|
| 196 |
+
- repos a user liked → `hf_user_likes(...)`
|
| 197 |
+
- recent activity feed → `hf_recent_activity(...)`
|
| 198 |
+
- organization members → `hf_org_members(...)`
|
| 199 |
+
- collections search → `hf_collections_search(...)`
|
| 200 |
+
- items inside a known collection → `hf_collection_items(...)`
|
| 201 |
+
- explicit current username → `hf_whoami()`
|
| 202 |
+
|
| 203 |
+
Direction reminders:
|
| 204 |
+
- `hf_user_likes(...)` = user → repos
|
| 205 |
+
- `hf_repo_likers(...)` = repo → users
|
| 206 |
+
- `hf_user_graph(...)` = user/org → followers/following
|
| 207 |
+
|
| 208 |
+
## Helper result shape
|
| 209 |
+
|
| 210 |
+
All helpers return:
|
| 211 |
+
|
| 212 |
+
```py
|
| 213 |
+
{
|
| 214 |
+
"ok": bool,
|
| 215 |
+
"item": dict | None,
|
| 216 |
+
"items": list[dict],
|
| 217 |
+
"meta": dict,
|
| 218 |
+
"error": str | None,
|
| 219 |
+
}
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
Rules:
|
| 223 |
+
- `items` is the canonical list field.
|
| 224 |
+
- `item` is just a singleton convenience.
|
| 225 |
+
- `meta` contains helper-owned execution, limit, and coverage info.
|
| 226 |
+
- When helper-owned coverage matters, prefer returning the helper envelope directly.
|
| 227 |
+
|
| 228 |
+
## High-signal output rules
|
| 229 |
+
|
| 230 |
+
- Prefer compact dict/list outputs over prose when the user asked for fields.
|
| 231 |
+
- Prefer summary helpers before detail hydration.
|
| 232 |
+
- Use canonical snake_case keys in generated code and structured output.
|
| 233 |
+
- Use `repo_id` as the display label for repos.
|
| 234 |
+
- Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
|
| 235 |
+
- For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
|
| 236 |
+
- For joins/intersections/rankings, fetch the needed working set first and compute locally.
|
| 237 |
+
- If the result is partial, use top-level keys `results` and `coverage`.
|
| 238 |
+
|
| 239 |
+
## Helper signatures (generated from Python)
|
| 240 |
+
|
| 241 |
+
These signatures are exported from the live runtime with `inspect.signature(...)`.
|
| 242 |
+
If prompt prose and signatures disagree, trust these signatures.
|
| 243 |
+
|
| 244 |
+
```py
|
| 245 |
+
await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' = None, limit: 'int' = 100, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 246 |
+
|
| 247 |
+
await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 248 |
+
|
| 249 |
+
await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 250 |
+
|
| 251 |
+
await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 252 |
+
|
| 253 |
+
await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 254 |
+
|
| 255 |
+
await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 256 |
+
|
| 257 |
+
await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
|
| 258 |
+
|
| 259 |
+
await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 260 |
+
|
| 261 |
+
await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 262 |
+
|
| 263 |
+
await hf_repo_discussion_details(repo_type: 'str', repo_id: 'str', discussion_num: 'int', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 264 |
+
|
| 265 |
+
await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 266 |
+
|
| 267 |
+
await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 268 |
+
|
| 269 |
+
await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 20, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 270 |
+
|
| 271 |
+
await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
|
| 272 |
+
|
| 273 |
+
await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 274 |
+
|
| 275 |
+
await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 276 |
+
|
| 277 |
+
await hf_user_graph(username: 'str | None' = None, relation: 'str' = 'followers', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 278 |
+
|
| 279 |
+
await hf_user_likes(username: 'str | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None, sort: 'str | None' = None, ranking_window: 'int | None' = None) -> 'dict[str, Any]'
|
| 280 |
+
|
| 281 |
+
await hf_whoami() -> 'dict[str, Any]'
|
| 282 |
+
```
|
| 283 |
+
|
| 284 |
+
## Helper contracts (generated from runtime + wrapper metadata)
|
| 285 |
+
|
| 286 |
+
These contracts describe the normalized wrapper surface exposed to generated code.
|
| 287 |
+
Field names and helper-visible enum values are canonical snake_case wrapper names.
|
| 288 |
+
|
| 289 |
+
All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
| 290 |
+
|
| 291 |
+
### hf_collection_items
|
| 292 |
+
|
| 293 |
+
- category: `collection_navigation`
|
| 294 |
+
- returns:
|
| 295 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 296 |
+
- row_type: `repo`
|
| 297 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 298 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `repo_url`
|
| 299 |
+
- optional_fields: `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 300 |
+
- supported_params: `collection_id`, `repo_types`, `limit`, `count_only`, `where`, `fields`
|
| 301 |
+
- param_values:
|
| 302 |
+
- repo_types: `model`, `dataset`, `space`
|
| 303 |
+
- fields_contract:
|
| 304 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 305 |
+
- canonical_only: `true`
|
| 306 |
+
- where_contract:
|
| 307 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 308 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 309 |
+
- normalized_only: `true`
|
| 310 |
+
- limit_contract:
|
| 311 |
+
- default_limit: `100`
|
| 312 |
+
- max_limit: `500`
|
| 313 |
+
- notes: Returns repos inside one collection as summary rows.
|
| 314 |
+
|
| 315 |
+
### hf_collections_search
|
| 316 |
+
|
| 317 |
+
- category: `collection_search`
|
| 318 |
+
- returns:
|
| 319 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 320 |
+
- row_type: `collection`
|
| 321 |
+
- default_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 322 |
+
- guaranteed_fields: `collection_id`, `title`, `owner`
|
| 323 |
+
- optional_fields: `slug`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 324 |
+
- supported_params: `query`, `owner`, `limit`, `count_only`, `where`, `fields`
|
| 325 |
+
- fields_contract:
|
| 326 |
+
- allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 327 |
+
- canonical_only: `true`
|
| 328 |
+
- where_contract:
|
| 329 |
+
- allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 330 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 331 |
+
- normalized_only: `true`
|
| 332 |
+
- limit_contract:
|
| 333 |
+
- default_limit: `20`
|
| 334 |
+
- max_limit: `500`
|
| 335 |
+
- notes: Collection summary helper.
|
| 336 |
+
|
| 337 |
+
### hf_daily_papers
|
| 338 |
+
|
| 339 |
+
- category: `curated_feed`
|
| 340 |
+
- returns:
|
| 341 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 342 |
+
- row_type: `daily_paper`
|
| 343 |
+
- default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 344 |
+
- guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
|
| 345 |
+
- optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
|
| 346 |
+
- supported_params: `limit`, `where`, `fields`
|
| 347 |
+
- fields_contract:
|
| 348 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 349 |
+
- canonical_only: `true`
|
| 350 |
+
- where_contract:
|
| 351 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 352 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 353 |
+
- normalized_only: `true`
|
| 354 |
+
- limit_contract:
|
| 355 |
+
- default_limit: `20`
|
| 356 |
+
- max_limit: `500`
|
| 357 |
+
- notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
|
| 358 |
+
|
| 359 |
+
### hf_datasets_search
|
| 360 |
+
|
| 361 |
+
- category: `wrapped_hf_repo_search`
|
| 362 |
+
- backed_by: `HfApi.list_datasets`
|
| 363 |
+
- returns:
|
| 364 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 365 |
+
- row_type: `repo`
|
| 366 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 367 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 368 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 369 |
+
- supported_params: `search`, `filter`, `author`, `benchmark`, `dataset_name`, `gated`, `language_creators`, `language`, `multilinguality`, `size_categories`, `task_categories`, `task_ids`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
|
| 370 |
+
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 371 |
+
- expand_values: `author`, `card_data`, `citation`, `created_at`, `description`, `disabled`, `downloads`, `downloads_all_time`, `gated`, `last_modified`, `likes`, `paperswithcode_id`, `private`, `resource_group`, `sha`, `siblings`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
|
| 372 |
+
- fields_contract:
|
| 373 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 374 |
+
- canonical_only: `true`
|
| 375 |
+
- post_filter_contract:
|
| 376 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 377 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 378 |
+
- normalized_only: `true`
|
| 379 |
+
- limit_contract:
|
| 380 |
+
- default_limit: `20`
|
| 381 |
+
- max_limit: `5000`
|
| 382 |
+
- notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 383 |
+
|
| 384 |
+
### hf_models_search
|
| 385 |
+
|
| 386 |
+
- category: `wrapped_hf_repo_search`
|
| 387 |
+
- backed_by: `HfApi.list_models`
|
| 388 |
+
- returns:
|
| 389 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 390 |
+
- row_type: `repo`
|
| 391 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 392 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 393 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 394 |
+
- supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
|
| 395 |
+
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 396 |
+
- expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
|
| 397 |
+
- fields_contract:
|
| 398 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 399 |
+
- canonical_only: `true`
|
| 400 |
+
- post_filter_contract:
|
| 401 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 402 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 403 |
+
- normalized_only: `true`
|
| 404 |
+
- limit_contract:
|
| 405 |
+
- default_limit: `20`
|
| 406 |
+
- max_limit: `5000`
|
| 407 |
+
- notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 408 |
+
|
| 409 |
+
### hf_org_members
|
| 410 |
+
|
| 411 |
+
- category: `graph_scan`
|
| 412 |
+
- returns:
|
| 413 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 414 |
+
- row_type: `actor`
|
| 415 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 416 |
+
- guaranteed_fields: `username`
|
| 417 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 418 |
+
- supported_params: `organization`, `limit`, `scan_limit`, `count_only`, `where`, `fields`
|
| 419 |
+
- fields_contract:
|
| 420 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 421 |
+
- canonical_only: `true`
|
| 422 |
+
- where_contract:
|
| 423 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 424 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 425 |
+
- normalized_only: `true`
|
| 426 |
+
- limit_contract:
|
| 427 |
+
- default_limit: `1000`
|
| 428 |
+
- max_limit: `10000`
|
| 429 |
+
- scan_max: `10000`
|
| 430 |
+
- notes: Returns organization member summary rows.
|
| 431 |
+
|
| 432 |
+
### hf_profile_summary
|
| 433 |
+
|
| 434 |
+
- category: `profile_summary`
|
| 435 |
+
- returns:
|
| 436 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 437 |
+
- row_type: `profile`
|
| 438 |
+
- default_fields: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
|
| 439 |
+
- guaranteed_fields: `handle`, `entity_type`
|
| 440 |
+
- optional_fields: `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
|
| 441 |
+
- supported_params: `handle`, `include`, `likes_limit`, `activity_limit`
|
| 442 |
+
- param_values:
|
| 443 |
+
- include: `likes`, `activity`
|
| 444 |
+
- notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
|
| 445 |
+
|
| 446 |
+
### hf_recent_activity
|
| 447 |
+
|
| 448 |
+
- category: `activity_feed`
|
| 449 |
+
- returns:
|
| 450 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 451 |
+
- row_type: `activity`
|
| 452 |
+
- default_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 453 |
+
- guaranteed_fields: `event_type`, `timestamp`
|
| 454 |
+
- optional_fields: `repo_id`, `repo_type`
|
| 455 |
+
- supported_params: `feed_type`, `entity`, `activity_types`, `repo_types`, `limit`, `max_pages`, `start_cursor`, `count_only`, `where`, `fields`
|
| 456 |
+
- param_values:
|
| 457 |
+
- feed_type: `user`, `org`
|
| 458 |
+
- repo_types: `model`, `dataset`, `space`
|
| 459 |
+
- fields_contract:
|
| 460 |
+
- allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 461 |
+
- canonical_only: `true`
|
| 462 |
+
- where_contract:
|
| 463 |
+
- allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 464 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 465 |
+
- normalized_only: `true`
|
| 466 |
+
- limit_contract:
|
| 467 |
+
- default_limit: `100`
|
| 468 |
+
- max_limit: `2000`
|
| 469 |
+
- max_pages: `10`
|
| 470 |
+
- page_limit: `100`
|
| 471 |
+
- notes: Activity helper may fetch multiple pages when requested coverage exceeds one page. count_only may still be a lower bound unless the feed exhausts before max_pages.
|
| 472 |
+
|
| 473 |
+
### hf_repo_details
|
| 474 |
+
|
| 475 |
+
- category: `repo_detail`
|
| 476 |
+
- returns:
|
| 477 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 478 |
+
- row_type: `repo`
|
| 479 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 480 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 481 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 482 |
+
- supported_params: `repo_id`, `repo_ids`, `repo_type`, `fields`
|
| 483 |
+
- param_values:
|
| 484 |
+
- repo_type: `model`, `dataset`, `space`, `auto`
|
| 485 |
+
- fields_contract:
|
| 486 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 487 |
+
- canonical_only: `true`
|
| 488 |
+
- notes: Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.
|
| 489 |
+
|
| 490 |
+
### hf_repo_discussion_details
|
| 491 |
+
|
| 492 |
+
- category: `discussion_detail`
|
| 493 |
+
- returns:
|
| 494 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 495 |
+
- row_type: `discussion_detail`
|
| 496 |
+
- default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 497 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `title`, `author`, `status`
|
| 498 |
+
- optional_fields: `num`, `created_at`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 499 |
+
- supported_params: `repo_type`, `repo_id`, `discussion_num`, `fields`
|
| 500 |
+
- param_values:
|
| 501 |
+
- repo_type: `model`, `dataset`, `space`
|
| 502 |
+
- fields_contract:
|
| 503 |
+
- allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 504 |
+
- canonical_only: `true`
|
| 505 |
+
- notes: Exact discussion detail helper.
|
| 506 |
+
|
| 507 |
+
### hf_repo_discussions
|
| 508 |
+
|
| 509 |
+
- category: `discussion_summary`
|
| 510 |
+
- returns:
|
| 511 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 512 |
+
- row_type: `discussion`
|
| 513 |
+
- default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
|
| 514 |
+
- guaranteed_fields: `num`, `title`, `author`, `status`
|
| 515 |
+
- optional_fields: `repo_id`, `repo_type`, `created_at`, `url`
|
| 516 |
+
- supported_params: `repo_type`, `repo_id`, `limit`, `fields`
|
| 517 |
+
- param_values:
|
| 518 |
+
- repo_type: `model`, `dataset`, `space`
|
| 519 |
+
- fields_contract:
|
| 520 |
+
- allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
|
| 521 |
+
- canonical_only: `true`
|
| 522 |
+
- limit_contract:
|
| 523 |
+
- default_limit: `20`
|
| 524 |
+
- max_limit: `200`
|
| 525 |
+
- notes: Discussion summary helper.
|
| 526 |
+
|
| 527 |
+
### hf_repo_likers
|
| 528 |
+
|
| 529 |
+
- category: `repo_to_users`
|
| 530 |
+
- returns:
|
| 531 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 532 |
+
- row_type: `actor`
|
| 533 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 534 |
+
- guaranteed_fields: `username`
|
| 535 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 536 |
+
- supported_params: `repo_id`, `repo_type`, `limit`, `count_only`, `pro_only`, `where`, `fields`
|
| 537 |
+
- param_values:
|
| 538 |
+
- repo_type: `model`, `dataset`, `space`
|
| 539 |
+
- fields_contract:
|
| 540 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 541 |
+
- canonical_only: `true`
|
| 542 |
+
- where_contract:
|
| 543 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 544 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 545 |
+
- normalized_only: `true`
|
| 546 |
+
- limit_contract:
|
| 547 |
+
- default_limit: `1000`
|
| 548 |
+
- notes: Returns users who liked a repo.
|
| 549 |
+
|
| 550 |
+
### hf_repo_search
|
| 551 |
+
|
| 552 |
+
- category: `cross_type_repo_search`
|
| 553 |
+
- returns:
|
| 554 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 555 |
+
- row_type: `repo`
|
| 556 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 557 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 558 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 559 |
+
- supported_params: `search`, `repo_type`, `repo_types`, `filter`, `author`, `sort`, `limit`, `fields`, `post_filter`
|
| 560 |
+
- sort_values_by_repo_type:
|
| 561 |
+
- dataset: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 562 |
+
- model: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 563 |
+
- space: `created_at`, `last_modified`, `likes`, `trending_score`
|
| 564 |
+
- param_values:
|
| 565 |
+
- repo_type: `model`, `dataset`, `space`
|
| 566 |
+
- repo_types: `model`, `dataset`, `space`
|
| 567 |
+
- sort: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 568 |
+
- fields_contract:
|
| 569 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 570 |
+
- canonical_only: `true`
|
| 571 |
+
- post_filter_contract:
|
| 572 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 573 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 574 |
+
- normalized_only: `true`
|
| 575 |
+
- limit_contract:
|
| 576 |
+
- default_limit: `20`
|
| 577 |
+
- max_limit: `5000`
|
| 578 |
+
- notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 579 |
+
|
| 580 |
+
### hf_runtime_capabilities
|
| 581 |
+
|
| 582 |
+
- category: `introspection`
|
| 583 |
+
- returns:
|
| 584 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 585 |
+
- row_type: `runtime_capability`
|
| 586 |
+
- default_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 587 |
+
- guaranteed_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 588 |
+
- optional_fields: []
|
| 589 |
+
- supported_params: `section`
|
| 590 |
+
- param_values:
|
| 591 |
+
- section: `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 592 |
+
- notes: Introspection helper. Use section=... to narrow the response.
|
| 593 |
+
|
| 594 |
+
### hf_spaces_search
|
| 595 |
+
|
| 596 |
+
- category: `wrapped_hf_repo_search`
|
| 597 |
+
- backed_by: `HfApi.list_spaces`
|
| 598 |
+
- returns:
|
| 599 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 600 |
+
- row_type: `repo`
|
| 601 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 602 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 603 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 604 |
+
- supported_params: `search`, `filter`, `author`, `datasets`, `models`, `linked`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
|
| 605 |
+
- sort_values: `created_at`, `last_modified`, `likes`, `trending_score`
|
| 606 |
+
- expand_values: `author`, `card_data`, `created_at`, `datasets`, `disabled`, `last_modified`, `likes`, `models`, `private`, `resource_group`, `runtime`, `sdk`, `sha`, `siblings`, `subdomain`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
|
| 607 |
+
- fields_contract:
|
| 608 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 609 |
+
- canonical_only: `true`
|
| 610 |
+
- post_filter_contract:
|
| 611 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 612 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 613 |
+
- normalized_only: `true`
|
| 614 |
+
- limit_contract:
|
| 615 |
+
- default_limit: `20`
|
| 616 |
+
- max_limit: `5000`
|
| 617 |
+
- notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 618 |
+
|
| 619 |
+
### hf_trending
|
| 620 |
+
|
| 621 |
+
- category: `curated_repo_feed`
|
| 622 |
+
- returns:
|
| 623 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 624 |
+
- row_type: `repo`
|
| 625 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 626 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`, `trending_rank`
|
| 627 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 628 |
+
- supported_params: `repo_type`, `limit`, `where`, `fields`
|
| 629 |
+
- param_values:
|
| 630 |
+
- repo_type: `model`, `dataset`, `space`, `all`
|
| 631 |
+
- fields_contract:
|
| 632 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 633 |
+
- canonical_only: `true`
|
| 634 |
+
- where_contract:
|
| 635 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 636 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 637 |
+
- normalized_only: `true`
|
| 638 |
+
- limit_contract:
|
| 639 |
+
- default_limit: `20`
|
| 640 |
+
- max_limit: `20`
|
| 641 |
+
- notes: Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.
|
| 642 |
+
|
| 643 |
+
### hf_user_graph
|
| 644 |
+
|
| 645 |
+
- category: `graph_scan`
|
| 646 |
+
- returns:
|
| 647 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 648 |
+
- row_type: `actor`
|
| 649 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 650 |
+
- guaranteed_fields: `username`
|
| 651 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 652 |
+
- supported_params: `username`, `relation`, `limit`, `scan_limit`, `count_only`, `pro_only`, `where`, `fields`
|
| 653 |
+
- param_values:
|
| 654 |
+
- relation: `followers`, `following`
|
| 655 |
+
- fields_contract:
|
| 656 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 657 |
+
- canonical_only: `true`
|
| 658 |
+
- where_contract:
|
| 659 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 660 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 661 |
+
- normalized_only: `true`
|
| 662 |
+
- limit_contract:
|
| 663 |
+
- default_limit: `1000`
|
| 664 |
+
- max_limit: `10000`
|
| 665 |
+
- scan_max: `10000`
|
| 666 |
+
- notes: Returns followers/following summary rows.
|
| 667 |
+
|
| 668 |
+
### hf_user_likes
|
| 669 |
+
|
| 670 |
+
- category: `user_to_repos`
|
| 671 |
+
- returns:
|
| 672 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 673 |
+
- row_type: `user_like`
|
| 674 |
+
- default_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 675 |
+
- guaranteed_fields: `liked_at`, `repo_id`, `repo_type`
|
| 676 |
+
- optional_fields: `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 677 |
+
- supported_params: `username`, `repo_types`, `limit`, `scan_limit`, `count_only`, `where`, `fields`, `sort`, `ranking_window`
|
| 678 |
+
- sort_values: `liked_at`, `repo_likes`, `repo_downloads`
|
| 679 |
+
- param_values:
|
| 680 |
+
- repo_types: `model`, `dataset`, `space`
|
| 681 |
+
- sort: `liked_at`, `repo_likes`, `repo_downloads`
|
| 682 |
+
- fields_contract:
|
| 683 |
+
- allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 684 |
+
- canonical_only: `true`
|
| 685 |
+
- where_contract:
|
| 686 |
+
- allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 687 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 688 |
+
- normalized_only: `true`
|
| 689 |
+
- limit_contract:
|
| 690 |
+
- default_limit: `100`
|
| 691 |
+
- max_limit: `2000`
|
| 692 |
+
- enrich_max: `50`
|
| 693 |
+
- ranking_default: `50`
|
| 694 |
+
- scan_max: `10000`
|
| 695 |
+
- notes: Default recency mode is cheap. Popularity-ranked sorts use canonical keys liked_at/repo_likes/repo_downloads and rerank only a bounded recent shortlist. Check meta.ranking_complete / meta.ranking_window when ranking by popularity; helper-owned coverage matters here.
|
| 696 |
+
|
| 697 |
+
### hf_whoami
|
| 698 |
+
|
| 699 |
+
- category: `identity`
|
| 700 |
+
- returns:
|
| 701 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 702 |
+
- row_type: `user`
|
| 703 |
+
- default_fields: `username`, `fullname`, `is_pro`
|
| 704 |
+
- guaranteed_fields: `username`
|
| 705 |
+
- optional_fields: `fullname`, `is_pro`
|
| 706 |
+
- supported_params: []
|
| 707 |
+
- notes: Returns the current authenticated user when a request token is available.
|
| 708 |
+
|
| 709 |
+
|
.prefab/agent-cards/_monty_codegen_shared.md
CHANGED
|
@@ -1,609 +1,3 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
-
|
| 4 |
-
- All helper functions are already in scope.
|
| 5 |
-
- All helper/API calls are async: always use `await`.
|
| 6 |
-
- `max_calls` is the total external-call budget for the whole generated program, not a generic helper argument.
|
| 7 |
-
- The outer wrapper is an exact contract. You **MUST** use this exact skeleton and only change the body:
|
| 8 |
-
|
| 9 |
-
```py
|
| 10 |
-
async def solve(query, max_calls):
|
| 11 |
-
...
|
| 12 |
-
# body goes here
|
| 13 |
-
|
| 14 |
-
await solve(query, max_calls)
|
| 15 |
-
```
|
| 16 |
-
|
| 17 |
-
- Always prefer helper functions. Use `call_api('/api/...')` only when no helper fits.
|
| 18 |
-
- `call_api` must receive a raw path starting with `/api/...`; never call helper names through `call_api`.
|
| 19 |
-
- `call_api(...)` returns `{ok, status, url, data, error}`. Always check `resp["ok"]` before reading `resp["data"]`. Do not read `resp["items"]` or `resp["meta"]` directly from `call_api(...)`.
|
| 20 |
-
- `call_api(...)` only accepts `endpoint`, `params`, `method`, and `json_body`. Do not guess extra kwargs.
|
| 21 |
-
- Use `call_api(...)` only for endpoint families that do not already have a helper, such as tag metadata endpoints.
|
| 22 |
-
- For questions about supported helpers, fields, limits, raw API affordances, or runtime capabilities, use `hf_runtime_capabilities(...)` instead of hand-authoring a static answer from memory.
|
| 23 |
-
- Keep final displayed results compact, but do not artificially shrink intermediate helper coverage unless the user explicitly asked for a sample.
|
| 24 |
-
- Prefer canonical snake_case keys in generated code and in JSON output.
|
| 25 |
-
- When returning a structured dict that includes your own coverage metadata, use the exact top-level keys `results` and `coverage` unless the user explicitly requested different key names.
|
| 26 |
-
- Omit unavailable optional fields instead of emitting `null` placeholders unless the user explicitly asked for a fixed schema with nulls.
|
| 27 |
-
- If the user asks for specific fields or says "return only", return exactly that final shape from `solve(...)`.
|
| 28 |
-
- For current-user prompts (`my`, `me`), use helpers with `username=None` first. Only ask for identity if that fails.
|
| 29 |
-
- When a current-user helper response has `ok=false`, return that helper response directly instead of flattening it into an empty result.
|
| 30 |
-
|
| 31 |
-
## Common helper signature traps
|
| 32 |
-
These are high-priority rules. Do not guess helper arguments.
|
| 33 |
-
|
| 34 |
-
- `hf_repo_search(...)` uses `limit`, **not** `return_limit`, and does **not** accept `count_only`.
|
| 35 |
-
- `hf_trending(...)` uses `limit`, **not** `return_limit`.
|
| 36 |
-
- `hf_daily_papers(...)` uses `limit`, **not** `return_limit`.
|
| 37 |
-
- `hf_repo_discussions(...)` uses `limit`, **not** `return_limit`.
|
| 38 |
-
- `hf_user_graph(...)`, `hf_user_likes(...)`, `hf_org_members(...)`, `hf_recent_activity(...)`, and `hf_collection_items(...)` use `return_limit`.
|
| 39 |
-
- `hf_profile_summary(include=...)` supports only `"likes"` and `"activity"`.
|
| 40 |
-
- Do **not** guess `hf_profile_summary(include=[...])` values such as `"followers"`, `"following"`, `"models"`, `"datasets"`, or `"spaces"`.
|
| 41 |
-
- `followers_count`, `following_count`, `models_count`, `datasets_count`, `spaces_count`, and similar aggregate counts already come from the base `hf_profile_summary(...)["item"]`.
|
| 42 |
-
- `return_limit=None` does **not** mean exhaustive or "all rows". It means the helper uses its documented default.
|
| 43 |
-
- When `count_only=True`, omit `return_limit`; count-only requests ignore row-return limits and return no items.
|
| 44 |
-
- For "how many models/datasets/spaces does org/user X have?" prefer `hf_profile_summary(...)["item"]` instead of trying to count with `hf_repo_search(...)`.
|
| 45 |
-
- Never invent helper args such as `count_only=True` for helpers that do not document it.
|
| 46 |
-
|
| 47 |
-
## Helper result shape
|
| 48 |
-
All helpers return:
|
| 49 |
-
```py
|
| 50 |
-
{
|
| 51 |
-
"ok": bool,
|
| 52 |
-
"item": dict | None,
|
| 53 |
-
"items": list[dict],
|
| 54 |
-
"meta": dict,
|
| 55 |
-
"error": str | None,
|
| 56 |
-
}
|
| 57 |
-
```
|
| 58 |
-
|
| 59 |
-
Rules:
|
| 60 |
-
- `items` is the canonical list field.
|
| 61 |
-
- `item` is only a singleton convenience.
|
| 62 |
-
- `meta` contains helper-owned execution, coverage, and limit information.
|
| 63 |
-
- For metadata-oriented prompts, return the relevant `meta` fields instead of inferring coverage from list length alone.
|
| 64 |
-
- For bounded list/sample helpers in raw mode, returning the helper envelope directly preserves helper-owned `meta` fields.
|
| 65 |
-
|
| 66 |
-
## Routing guide
|
| 67 |
-
|
| 68 |
-
### Summary vs detail
|
| 69 |
-
- Summary helpers are the default for list/search/trending questions: `hf_repo_search(...)`, `hf_trending(...)`, `hf_daily_papers(...)`, `hf_user_likes(...)`, `hf_recent_activity(...)`, `hf_collections_search(...)`, `hf_collection_items(...)`, `hf_org_members(...)`, `hf_user_graph(...)`.
|
| 70 |
-
- Use `hf_repo_details(...)` when the user needs exact repo metadata rather than a cheap summary row.
|
| 71 |
-
- Do **not** invent follow-up detail calls unless the user explicitly needs fields that are not already available in the current helper response.
|
| 72 |
-
|
| 73 |
-
### Runtime self-description
|
| 74 |
-
- Supported helpers / default fields / limits / raw API affordances → `hf_runtime_capabilities(...)`
|
| 75 |
-
- If the question is specifically about helper defaults or cost behavior, prefer `hf_runtime_capabilities(section="helper_defaults")`.
|
| 76 |
-
|
| 77 |
-
### Repo questions
|
| 78 |
-
- Exact `owner/name` details → `hf_repo_details(repo_type="auto", ...)`
|
| 79 |
-
- Search/discovery/list/top repos → `hf_repo_search(...)`
|
| 80 |
-
- True trending requests → `hf_trending(...)`
|
| 81 |
-
- Daily papers → `hf_daily_papers(...)`
|
| 82 |
-
- Repo discussions → `hf_repo_discussions(...)`
|
| 83 |
-
- Specific discussion details / latest comment text → `hf_repo_discussion_details(...)`
|
| 84 |
-
- Users who liked a specific repo → `hf_repo_likers(...)`
|
| 85 |
-
|
| 86 |
-
### User questions
|
| 87 |
-
- Profile / overview / "tell me about user X" → `hf_profile_summary(...)`
|
| 88 |
-
- Follower/following **counts** for a user → prefer `hf_profile_summary(...)`
|
| 89 |
-
- Followers / following **lists**, graph samples, and social joins → `hf_user_graph(...)`
|
| 90 |
-
- Repos a user liked → `hf_user_likes(...)`
|
| 91 |
-
- Recent actions / activity feed → `hf_recent_activity(feed_type="user", entity=...)`
|
| 92 |
-
|
| 93 |
-
### Organization questions
|
| 94 |
-
- Organization details and counts → `hf_profile_summary(...)`
|
| 95 |
-
- Organization members → `hf_org_members(...)`
|
| 96 |
-
- Organization repos → `hf_repo_search(author="<org>", repo_types=[...])`
|
| 97 |
-
- Organization or user collections → `hf_collections_search(owner="<org-or-user>", ...)`
|
| 98 |
-
- Repos inside a known collection → `hf_collection_items(collection_id=...)`
|
| 99 |
-
|
| 100 |
-
### Direction reminders
|
| 101 |
-
- `hf_user_likes(...)` = **user → repos**
|
| 102 |
-
- `hf_repo_likers(...)` = **repo → users**
|
| 103 |
-
- `hf_user_graph(...)` = **user/org → followers/following**
|
| 104 |
-
- `"who follows X"` → `hf_user_graph(username="X", relation="followers", ...)`
|
| 105 |
-
- `"who does X follow"` → `hf_user_graph(username="X", relation="following", ...)`
|
| 106 |
-
- If the author/org is already known, start with `hf_repo_search(author=...)` instead of semantic search.
|
| 107 |
-
- For "most popular repo a user liked", use `hf_user_likes(sort="repoLikes" | "repoDownloads", ranking_window=40)` instead of fetching recent likes and re-ranking locally.
|
| 108 |
-
|
| 109 |
-
### Join / intersection guidance
|
| 110 |
-
- For set-intersection questions, prefer **one helper call per side + local set logic**.
|
| 111 |
-
- Example: `"who in the huggingface org follows evalstate"` should use:
|
| 112 |
-
1. `hf_org_members(organization="huggingface", ...)`
|
| 113 |
-
2. `hf_user_graph(username="evalstate", relation="followers", ...)`
|
| 114 |
-
3. intersect `username` locally
|
| 115 |
-
- Example: `"who in the huggingface org does evalstate follow"` should use:
|
| 116 |
-
1. `hf_org_members(organization="huggingface", ...)`
|
| 117 |
-
2. `hf_user_graph(username="evalstate", relation="following", ...)`
|
| 118 |
-
3. intersect `username` locally
|
| 119 |
-
- Do **not** invert follower/following direction when restating the prompt.
|
| 120 |
-
- Do **not** do one graph call per org member for these intersection questions unless you explicitly need a bounded fallback.
|
| 121 |
-
|
| 122 |
-
## Common row keys
|
| 123 |
-
Use these canonical keys unless the user explicitly wants different names.
|
| 124 |
-
|
| 125 |
-
- Repo rows: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `library_name`, `repo_url`, `tags`
|
| 126 |
-
- Daily paper rows: `paper_id`, `title`, `published_at`, `authors`, `organization`, `repo_id`, `rank`
|
| 127 |
-
- User graph/member rows: `username`, `fullname`, `isPro`, `role`, `type`
|
| 128 |
-
- Activity rows: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 129 |
-
- Collection rows: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `last_updated`, `item_count`
|
| 130 |
-
- `hf_profile_summary(...)["item"]`: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `is_pro`, `likes_sample`, `activity_sample`
|
| 131 |
-
|
| 132 |
-
Common aliases in `fields=[...]` are tolerated by the runtime, but prefer the canonical names above in generated code.
|
| 133 |
-
|
| 134 |
-
## Common repo fields
|
| 135 |
-
- `repo_id`
|
| 136 |
-
- `repo_type`
|
| 137 |
-
- `author`
|
| 138 |
-
- `likes`
|
| 139 |
-
- `downloads`
|
| 140 |
-
- `created_at`
|
| 141 |
-
- `last_modified`
|
| 142 |
-
- `pipeline_tag`
|
| 143 |
-
- `num_params`
|
| 144 |
-
- `repo_url`
|
| 145 |
-
- model: `library_name`
|
| 146 |
-
- dataset: `description`, `paperswithcode_id`
|
| 147 |
-
- space: `sdk`, `models`, `datasets`, `subdomain`
|
| 148 |
-
- trending: `trending_rank`, `trending_score` when present
|
| 149 |
-
- prefer `repo_id` as the display label for repos; `title` may be absent or may just mirror `repo_id`
|
| 150 |
-
|
| 151 |
-
Common aliases tolerated in `fields=[...]`:
|
| 152 |
-
- `repoId` → `repo_id`
|
| 153 |
-
- `repoType` → `repo_type`
|
| 154 |
-
- `repoUrl` → `repo_url`
|
| 155 |
-
- `createdAt` → `created_at`
|
| 156 |
-
- `lastModified` → `last_modified`
|
| 157 |
-
- `numParams` → `num_params`
|
| 158 |
-
|
| 159 |
-
## Common collection fields
|
| 160 |
-
- `collection_id`
|
| 161 |
-
- `slug`
|
| 162 |
-
- `title`
|
| 163 |
-
- `owner`
|
| 164 |
-
- `owner_type`
|
| 165 |
-
- `description`
|
| 166 |
-
- `last_updated`
|
| 167 |
-
- `item_count`
|
| 168 |
-
|
| 169 |
-
Common aliases tolerated in `fields=[...]`:
|
| 170 |
-
- `collectionId` → `collection_id`
|
| 171 |
-
- `lastUpdated` → `last_updated`
|
| 172 |
-
- `ownerType` → `owner_type`
|
| 173 |
-
- `itemCount` → `item_count`
|
| 174 |
-
- `author` → `owner`
|
| 175 |
-
|
| 176 |
-
## High-signal usage notes
|
| 177 |
-
- `hf_repo_search(...)` defaults to models if no repo type is specified. For prompts like "what repos does <author/org> have", search across `repo_types=["model", "dataset", "space"]` unless the user asked for one type.
|
| 178 |
-
- `hf_repo_search(...)` and `hf_trending(...)` are summary helpers. Use `hf_repo_details(...)` when the user explicitly needs exact repo metadata.
|
| 179 |
-
- For models, datasets, and spaces, do **not** rely on a separate repo `title` field in summary outputs. Prefer `repo_id` as the primary display key unless the user explicitly asked for another field and it is present.
|
| 180 |
-
- `hf_repo_search(...)` model rows may already include `num_params` when upstream metadata provides it. Use that cheap summary field before considering detail hydration.
|
| 181 |
-
- `hf_trending(...)` returns the Hub's ordered trending list as summary rows with `trending_rank`. `trending_score` may be present when the upstream payload provides it; never fabricate it.
|
| 182 |
-
- `hf_daily_papers(...)` is the normal path for today's daily papers. `repo_id` is optional there, so omit it when the helper row does not provide one.
|
| 183 |
-
- `hf_profile_summary(...)` is the fastest way to answer common profile prompts. Read profile/social fields directly from `summary["item"]`.
|
| 184 |
-
- For prompts like "how many followers do I have?" or "how many users does X follow?", prefer `hf_profile_summary(...)["item"]` for the aggregate count.
|
| 185 |
-
- For prompts like "who follows me?", "who does X follow?", or any follower/following intersection, use `hf_user_graph(...)` with the correct `relation`.
|
| 186 |
-
- For "how many models/datasets/spaces does user/org X have?" prompts, prefer `hf_profile_summary(...)["item"]` over `hf_repo_search(..., limit=1)` or invented `count_only` args.
|
| 187 |
-
- Use `hf_whoami()` when you need the explicit current username for joins, comparisons, or output labeling.
|
| 188 |
-
- For overlap/comparison/ranking/join tasks, fetch a broad enough **working set** first and compute locally in code.
|
| 189 |
-
- It is good to use a larger internal working set than the final user-facing output. Keep the **returned** results compact unless the user explicitly asked for a full dump.
|
| 190 |
-
- For completeness-sensitive joins over followers/members/likers, use an explicit large `return_limit` on the seed helpers rather than `return_limit=None`.
|
| 191 |
-
- Good pattern: use larger limits internally for coverage, then return only the compact final intersection/ranking/projection the user asked for.
|
| 192 |
-
- Avoid per-row hydration calls unless you truly need exact metadata that is not already present in the current helper response.
|
| 193 |
-
- For prompts that ask for both a sample and metadata, keep the sample compact and surface helper-owned `meta` fields explicitly.
|
| 194 |
-
- For follower/member social-link lookups, first fetch usernames with `hf_user_graph(...)` or `hf_org_members(...)`, then fetch profile/social data with `hf_profile_summary(handle=...)`.
|
| 195 |
-
- For fan-out tasks that require one helper call per follower/member/liker/repo/user, prefer bounded seed sets **by default** so ordinary requests stay fast and predictable.
|
| 196 |
-
- If the user explicitly asks for exhaustive coverage (`all`, `scan all`, `entire`, `not just the first N`, `ensure more than the first 20`, etc.), do **not** silently cap the seed at a small sample such as 20 or 50.
|
| 197 |
-
- For those explicit exhaustive requests, attempt a substantially broader seed scan first when the runtime budget permits.
|
| 198 |
-
- For explicit exhaustive follower/member scans, prefer omitting `return_limit` or using a value large enough to cover the expected total. Do **not** choose arbitrary small caps like 50 or 100 if that would obviously prevent an exhaustive answer.
|
| 199 |
-
- If the prompt says both `scan all` and `more than the first 20`, the `scan all` requirement wins. Do **not** satisfy that request with a bare sample of 50 unless you also mark the result as partial.
|
| 200 |
-
- If exhaustive coverage is still not feasible within `max_calls` or timeout, say so clearly and return an explicit partial result with coverage metadata instead of presenting a bounded sample as if it were complete.
|
| 201 |
-
- When you return a composed partial result, use the exact top-level keys `results` and `coverage` unless the user explicitly asked for a different schema. Do **not** rename `results` to `items`, `rows`, `liked_models`, or similar.
|
| 202 |
-
- Do **not** use your own top-level transport wrapper named `meta` in raw mode; runtime already owns the outer `meta`.
|
| 203 |
-
- Good coverage fields for partial fan-out results include: `partial`, `reason`, `seed_limit`, `seed_processed`, `seed_total`, `seed_more_available`, `per_entity_limit`, and `next_request_hint`.
|
| 204 |
-
- If the user did not explicitly require exhaustiveness, a clear partial result with coverage metadata is better than failing with `Max API calls exceeded`.
|
| 205 |
-
- If the user **did** explicitly require exhaustiveness and you cannot complete it, do not imply success. Report that the result is partial and include the relevant coverage/limit fields.
|
| 206 |
-
- For explicit exhaustive follower/member prompts, if `meta.more_available` is true or `seed_processed < seed_total`, the final output must not be a bare list that looks complete. Include explicit partial/coverage information.
|
| 207 |
-
- For compact join outputs, it is fine for the internal seed helpers to use larger limits than the final returned list. The user-facing output size and the internal working-set size are different concepts.
|
| 208 |
-
- Use `hf_recent_activity(...)` for activity feeds instead of raw `call_api('/api/recent-activity', ...)`.
|
| 209 |
-
- Use `hf_repo_search(author=..., repo_type="space", ...)` for Spaces by author; there is no separate spaces-by-author helper.
|
| 210 |
-
- Use `hf_collections_search(owner=...)` for "what collections does this org/user have?" prompts.
|
| 211 |
-
- `hf_collections_search(...)` is for finding/listing collections. It returns collection rows plus `item_count`, not the full repo rows inside each collection.
|
| 212 |
-
- Use `hf_collection_items(collection_id=...)` for "what repos/models/datasets/spaces are in this collection?" prompts.
|
| 213 |
-
- Do **not** guess raw collection item endpoints such as `/api/collections/.../items`.
|
| 214 |
-
|
| 215 |
-
## Helper API
|
| 216 |
-
```py
|
| 217 |
-
await hf_runtime_capabilities(section: str | None = None)
|
| 218 |
-
|
| 219 |
-
await hf_profile_summary(
|
| 220 |
-
handle: str | None = None,
|
| 221 |
-
include: list[str] | None = None,
|
| 222 |
-
likes_limit: int = 10,
|
| 223 |
-
activity_limit: int = 10,
|
| 224 |
-
)
|
| 225 |
-
# include supports only: ["likes"], ["activity"], or ["likes", "activity"]
|
| 226 |
-
# aggregate counts like followers_count / following_count / models_count are already in item
|
| 227 |
-
|
| 228 |
-
await hf_org_members(
|
| 229 |
-
organization: str,
|
| 230 |
-
return_limit: int | None = None,
|
| 231 |
-
scan_limit: int | None = None,
|
| 232 |
-
count_only: bool = False,
|
| 233 |
-
where: dict | None = None,
|
| 234 |
-
fields: list[str] | None = None,
|
| 235 |
-
)
|
| 236 |
-
|
| 237 |
-
await hf_repo_search(
|
| 238 |
-
query: str | None = None,
|
| 239 |
-
repo_type: str | None = None,
|
| 240 |
-
repo_types: list[str] | None = None,
|
| 241 |
-
author: str | None = None,
|
| 242 |
-
filters: list[str] | None = None,
|
| 243 |
-
sort: str | None = None,
|
| 244 |
-
limit: int = 20,
|
| 245 |
-
where: dict | None = None,
|
| 246 |
-
fields: list[str] | None = None,
|
| 247 |
-
advanced: dict | None = None,
|
| 248 |
-
)
|
| 249 |
-
|
| 250 |
-
await hf_repo_details(
|
| 251 |
-
repo_id: str | None = None,
|
| 252 |
-
repo_ids: list[str] | None = None,
|
| 253 |
-
repo_type: str = "auto",
|
| 254 |
-
fields: list[str] | None = None,
|
| 255 |
-
)
|
| 256 |
-
|
| 257 |
-
await hf_trending(
|
| 258 |
-
repo_type: str = "model",
|
| 259 |
-
limit: int = 20,
|
| 260 |
-
where: dict | None = None,
|
| 261 |
-
fields: list[str] | None = None,
|
| 262 |
-
)
|
| 263 |
-
|
| 264 |
-
await hf_daily_papers(
|
| 265 |
-
limit: int = 20,
|
| 266 |
-
where: dict | None = None,
|
| 267 |
-
fields: list[str] | None = None,
|
| 268 |
-
)
|
| 269 |
-
|
| 270 |
-
await hf_user_graph(
|
| 271 |
-
username: str | None = None,
|
| 272 |
-
relation: str = "followers",
|
| 273 |
-
return_limit: int | None = None,
|
| 274 |
-
scan_limit: int | None = None,
|
| 275 |
-
count_only: bool = False,
|
| 276 |
-
pro_only: bool | None = None,
|
| 277 |
-
where: dict | None = None,
|
| 278 |
-
fields: list[str] | None = None,
|
| 279 |
-
)
|
| 280 |
-
|
| 281 |
-
await hf_repo_likers(
|
| 282 |
-
repo_id: str,
|
| 283 |
-
repo_type: str,
|
| 284 |
-
return_limit: int | None = None,
|
| 285 |
-
count_only: bool = False,
|
| 286 |
-
pro_only: bool | None = None,
|
| 287 |
-
where: dict | None = None,
|
| 288 |
-
fields: list[str] | None = None,
|
| 289 |
-
)
|
| 290 |
-
|
| 291 |
-
await hf_user_likes(
|
| 292 |
-
username: str | None = None,
|
| 293 |
-
repo_types: list[str] | None = None,
|
| 294 |
-
return_limit: int | None = None,
|
| 295 |
-
scan_limit: int | None = None,
|
| 296 |
-
count_only: bool = False,
|
| 297 |
-
where: dict | None = None,
|
| 298 |
-
fields: list[str] | None = None,
|
| 299 |
-
sort: str | None = None,
|
| 300 |
-
ranking_window: int | None = None,
|
| 301 |
-
)
|
| 302 |
-
|
| 303 |
-
await hf_recent_activity(
|
| 304 |
-
feed_type: str | None = None,
|
| 305 |
-
entity: str | None = None,
|
| 306 |
-
activity_types: list[str] | None = None,
|
| 307 |
-
repo_types: list[str] | None = None,
|
| 308 |
-
return_limit: int | None = None,
|
| 309 |
-
max_pages: int | None = None,
|
| 310 |
-
start_cursor: str | None = None,
|
| 311 |
-
count_only: bool = False,
|
| 312 |
-
where: dict | None = None,
|
| 313 |
-
fields: list[str] | None = None,
|
| 314 |
-
)
|
| 315 |
-
|
| 316 |
-
await hf_repo_discussions(repo_type: str, repo_id: str, limit: int = 20)
|
| 317 |
-
await hf_repo_discussion_details(repo_type: str, repo_id: str, discussion_num: int)
|
| 318 |
-
|
| 319 |
-
await hf_collections_search(
|
| 320 |
-
query: str | None = None,
|
| 321 |
-
owner: str | None = None,
|
| 322 |
-
return_limit: int = 20,
|
| 323 |
-
count_only: bool = False,
|
| 324 |
-
where: dict | None = None,
|
| 325 |
-
fields: list[str] | None = None,
|
| 326 |
-
)
|
| 327 |
-
|
| 328 |
-
await hf_collection_items(
|
| 329 |
-
collection_id: str,
|
| 330 |
-
repo_types: list[str] | None = None,
|
| 331 |
-
return_limit: int = 100,
|
| 332 |
-
count_only: bool = False,
|
| 333 |
-
where: dict | None = None,
|
| 334 |
-
fields: list[str] | None = None,
|
| 335 |
-
)
|
| 336 |
-
|
| 337 |
-
await hf_whoami()
|
| 338 |
-
await call_api(endpoint: str, params: dict | None = None, method: str = "GET", json_body: dict | None = None)
|
| 339 |
-
```
|
| 340 |
-
|
| 341 |
-
## Minimal patterns
|
| 342 |
-
```py
|
| 343 |
-
# Exact repo details
|
| 344 |
-
info = await hf_repo_details(
|
| 345 |
-
repo_id="black-forest-labs/FLUX.1-dev",
|
| 346 |
-
repo_type="auto",
|
| 347 |
-
fields=["repo_id", "repo_type", "author", "pipeline_tag", "library_name", "num_params", "likes", "downloads", "repo_url"],
|
| 348 |
-
)
|
| 349 |
-
item = info["item"] or (info["items"][0] if info["items"] else None)
|
| 350 |
-
return {
|
| 351 |
-
"repo_id": item["repo_id"],
|
| 352 |
-
"repo_type": item["repo_type"],
|
| 353 |
-
"author": item["author"],
|
| 354 |
-
"pipeline_tag": item.get("pipeline_tag"),
|
| 355 |
-
"library_name": item.get("library_name"),
|
| 356 |
-
"num_params": item.get("num_params"),
|
| 357 |
-
"likes": item.get("likes"),
|
| 358 |
-
"downloads": item.get("downloads"),
|
| 359 |
-
"repo_url": item.get("repo_url"),
|
| 360 |
-
}
|
| 361 |
-
|
| 362 |
-
# Runtime capability / supported-field introspection
|
| 363 |
-
caps = await hf_runtime_capabilities(section="fields")
|
| 364 |
-
if not caps["ok"]:
|
| 365 |
-
return caps
|
| 366 |
-
item = caps["item"] or (caps["items"][0] if caps["items"] else None)
|
| 367 |
-
return item["content"]
|
| 368 |
-
|
| 369 |
-
# Compact profile summary
|
| 370 |
-
summary = await hf_profile_summary(
|
| 371 |
-
handle="mishig",
|
| 372 |
-
include=["likes", "activity"],
|
| 373 |
-
likes_limit=10,
|
| 374 |
-
activity_limit=10,
|
| 375 |
-
)
|
| 376 |
-
item = summary["item"] or (summary["items"][0] if summary["items"] else None)
|
| 377 |
-
return {
|
| 378 |
-
"followers_count": item["followers_count"],
|
| 379 |
-
"following_count": item.get("following_count"),
|
| 380 |
-
"activity_sample": item.get("activity_sample", []),
|
| 381 |
-
"likes_sample": item.get("likes_sample", []),
|
| 382 |
-
}
|
| 383 |
-
|
| 384 |
-
# Current user's pro followers and their recent liked repos
|
| 385 |
-
followers = await hf_user_graph(
|
| 386 |
-
relation="followers",
|
| 387 |
-
pro_only=True,
|
| 388 |
-
fields=["username"],
|
| 389 |
-
)
|
| 390 |
-
if not followers["ok"]:
|
| 391 |
-
return followers
|
| 392 |
-
result = {}
|
| 393 |
-
for row in followers["items"]:
|
| 394 |
-
uname = row.get("username")
|
| 395 |
-
if not uname:
|
| 396 |
-
continue
|
| 397 |
-
likes = await hf_user_likes(
|
| 398 |
-
username=uname,
|
| 399 |
-
return_limit=3,
|
| 400 |
-
fields=["repo_id", "repo_type", "liked_at", "repo_url"],
|
| 401 |
-
)
|
| 402 |
-
repos = []
|
| 403 |
-
for item in likes["items"]:
|
| 404 |
-
repo = {}
|
| 405 |
-
for key in ["repo_id", "repo_type", "liked_at", "repo_url"]:
|
| 406 |
-
if item.get(key) is not None:
|
| 407 |
-
repo[key] = item[key]
|
| 408 |
-
if repo:
|
| 409 |
-
repos.append(repo)
|
| 410 |
-
if repos:
|
| 411 |
-
result[uname] = repos
|
| 412 |
-
return result
|
| 413 |
-
|
| 414 |
-
# Fan-out query with bounded partial coverage metadata
|
| 415 |
-
followers = await hf_user_graph(
|
| 416 |
-
relation="followers",
|
| 417 |
-
return_limit=20,
|
| 418 |
-
fields=["username"],
|
| 419 |
-
)
|
| 420 |
-
if not followers["ok"]:
|
| 421 |
-
return followers
|
| 422 |
-
result = {}
|
| 423 |
-
processed = 0
|
| 424 |
-
for row in followers["items"]:
|
| 425 |
-
uname = row.get("username")
|
| 426 |
-
if not uname:
|
| 427 |
-
continue
|
| 428 |
-
likes = await hf_user_likes(
|
| 429 |
-
username=uname,
|
| 430 |
-
repo_types=["model"],
|
| 431 |
-
return_limit=3,
|
| 432 |
-
fields=["repo_id", "repo_author", "liked_at"],
|
| 433 |
-
)
|
| 434 |
-
processed += 1
|
| 435 |
-
items = []
|
| 436 |
-
for item in likes["items"]:
|
| 437 |
-
liked = {}
|
| 438 |
-
for key in ["repo_id", "repo_author", "liked_at"]:
|
| 439 |
-
if item.get(key) is not None:
|
| 440 |
-
liked[key] = item[key]
|
| 441 |
-
if liked:
|
| 442 |
-
items.append(liked)
|
| 443 |
-
if items:
|
| 444 |
-
result[uname] = items
|
| 445 |
-
return {
|
| 446 |
-
"results": result,
|
| 447 |
-
"coverage": {
|
| 448 |
-
"partial": bool(followers["meta"].get("more_available")),
|
| 449 |
-
"reason": "fanout_budget",
|
| 450 |
-
"seed_relation": "followers",
|
| 451 |
-
"seed_limit": 20,
|
| 452 |
-
"seed_processed": processed,
|
| 453 |
-
"seed_total": followers["meta"].get("total"),
|
| 454 |
-
"seed_more_available": followers["meta"].get("more_available"),
|
| 455 |
-
"per_entity_limit": 3,
|
| 456 |
-
"next_request_hint": "Ask for a smaller subset or a follow-up batch if you want more coverage.",
|
| 457 |
-
},
|
| 458 |
-
}
|
| 459 |
-
|
| 460 |
-
# Popularity-ranked likes with metadata
|
| 461 |
-
likes = await hf_user_likes(
|
| 462 |
-
username="julien-c",
|
| 463 |
-
return_limit=1,
|
| 464 |
-
sort="repoLikes",
|
| 465 |
-
ranking_window=40,
|
| 466 |
-
fields=["repo_id", "repo_type", "repo_author", "likes", "repo_url", "liked_at"],
|
| 467 |
-
)
|
| 468 |
-
item = likes["item"] or (likes["items"][0] if likes["items"] else None)
|
| 469 |
-
if item is None:
|
| 470 |
-
return {"error": "No liked repositories found"}
|
| 471 |
-
repo = {}
|
| 472 |
-
for key in ["repo_id", "repo_type", "repo_author", "likes", "repo_url", "liked_at"]:
|
| 473 |
-
if item.get(key) is not None:
|
| 474 |
-
repo[key] = item[key]
|
| 475 |
-
return {
|
| 476 |
-
"repo": repo,
|
| 477 |
-
"metadata": {
|
| 478 |
-
"sort_applied": likes["meta"].get("sort_applied"),
|
| 479 |
-
"ranking_window": likes["meta"].get("ranking_window"),
|
| 480 |
-
"ranking_complete": likes["meta"].get("ranking_complete"),
|
| 481 |
-
},
|
| 482 |
-
}
|
| 483 |
-
|
| 484 |
-
# Recent activity with compact snake_case rows
|
| 485 |
-
activity = await hf_recent_activity(
|
| 486 |
-
feed_type="user",
|
| 487 |
-
entity="mishig",
|
| 488 |
-
return_limit=15,
|
| 489 |
-
fields=["event_type", "repo_id", "repo_type", "timestamp"],
|
| 490 |
-
)
|
| 491 |
-
result = []
|
| 492 |
-
for row in activity["items"]:
|
| 493 |
-
item = {}
|
| 494 |
-
for key in ["event_type", "repo_id", "repo_type", "timestamp"]:
|
| 495 |
-
if row.get(key) is not None:
|
| 496 |
-
item[key] = row[key]
|
| 497 |
-
if item:
|
| 498 |
-
result.append(item)
|
| 499 |
-
return result
|
| 500 |
-
|
| 501 |
-
# Repo discussions
|
| 502 |
-
rows = await hf_repo_discussions(
|
| 503 |
-
repo_type="model",
|
| 504 |
-
repo_id="Qwen/Qwen3.5-35B-A3B",
|
| 505 |
-
limit=10,
|
| 506 |
-
)
|
| 507 |
-
return [
|
| 508 |
-
{
|
| 509 |
-
"num": row["num"],
|
| 510 |
-
"title": row["title"],
|
| 511 |
-
"author": row["author"],
|
| 512 |
-
"status": row["status"],
|
| 513 |
-
}
|
| 514 |
-
for row in rows["items"]
|
| 515 |
-
]
|
| 516 |
-
|
| 517 |
-
# Collections owned by an org or user
|
| 518 |
-
collections = await hf_collections_search(
|
| 519 |
-
owner="Qwen",
|
| 520 |
-
return_limit=20,
|
| 521 |
-
fields=["collection_id", "title", "owner", "description", "last_updated", "item_count"],
|
| 522 |
-
)
|
| 523 |
-
return collections["items"]
|
| 524 |
-
|
| 525 |
-
# Daily papers via the helper
|
| 526 |
-
papers = await hf_daily_papers(
|
| 527 |
-
limit=20,
|
| 528 |
-
fields=["title", "repo_id"],
|
| 529 |
-
)
|
| 530 |
-
return papers["items"]
|
| 531 |
-
|
| 532 |
-
# Organization repo counts
|
| 533 |
-
org = await hf_profile_summary("unsloth")
|
| 534 |
-
item = org["item"] or (org["items"][0] if org["items"] else None)
|
| 535 |
-
return {
|
| 536 |
-
"organization": item["handle"],
|
| 537 |
-
"models_count": item.get("models_count"),
|
| 538 |
-
"datasets_count": item.get("datasets_count"),
|
| 539 |
-
"spaces_count": item.get("spaces_count"),
|
| 540 |
-
}
|
| 541 |
-
|
| 542 |
-
# Do any authors of the top trending spaces follow me?
|
| 543 |
-
who = await hf_whoami()
|
| 544 |
-
if not who["ok"]:
|
| 545 |
-
return who
|
| 546 |
-
me = (who["item"] or (who["items"][0] if who["items"] else None)).get("username")
|
| 547 |
-
spaces = await hf_trending(
|
| 548 |
-
repo_type="space",
|
| 549 |
-
limit=20,
|
| 550 |
-
fields=["repo_id", "author", "repo_url"],
|
| 551 |
-
)
|
| 552 |
-
authors = []
|
| 553 |
-
seen = set()
|
| 554 |
-
for row in spaces["items"]:
|
| 555 |
-
author = row.get("author")
|
| 556 |
-
if isinstance(author, str) and author and author not in seen:
|
| 557 |
-
seen.add(author)
|
| 558 |
-
authors.append(author)
|
| 559 |
-
|
| 560 |
-
results = []
|
| 561 |
-
processed = 0
|
| 562 |
-
for author in authors[:20]:
|
| 563 |
-
graph = await hf_user_graph(
|
| 564 |
-
username=author,
|
| 565 |
-
relation="following",
|
| 566 |
-
return_limit=200,
|
| 567 |
-
fields=["username"],
|
| 568 |
-
)
|
| 569 |
-
processed += 1
|
| 570 |
-
if not graph["ok"]:
|
| 571 |
-
continue
|
| 572 |
-
if any(item.get("username") == me for item in graph["items"]):
|
| 573 |
-
results.append(author)
|
| 574 |
-
|
| 575 |
-
return {
|
| 576 |
-
"results": results,
|
| 577 |
-
"coverage": {
|
| 578 |
-
"partial": False,
|
| 579 |
-
"reason": None,
|
| 580 |
-
"seed_relation": "trending_space_authors",
|
| 581 |
-
"seed_limit": 20,
|
| 582 |
-
"seed_processed": processed,
|
| 583 |
-
"seed_total": len(authors),
|
| 584 |
-
"seed_more_available": False,
|
| 585 |
-
"per_entity_limit": 200,
|
| 586 |
-
},
|
| 587 |
-
}
|
| 588 |
-
|
| 589 |
-
# Models inside an org's collections
|
| 590 |
-
collections = await hf_collections_search(
|
| 591 |
-
owner="openai",
|
| 592 |
-
return_limit=20,
|
| 593 |
-
fields=["collection_id", "title"],
|
| 594 |
-
)
|
| 595 |
-
result = {}
|
| 596 |
-
for coll in collections["items"]:
|
| 597 |
-
collection_id = coll.get("collection_id")
|
| 598 |
-
title = coll.get("title") or collection_id
|
| 599 |
-
if not collection_id:
|
| 600 |
-
continue
|
| 601 |
-
items = await hf_collection_items(
|
| 602 |
-
collection_id=collection_id,
|
| 603 |
-
repo_types=["model"],
|
| 604 |
-
fields=["repo_id", "repo_type", "repo_url"],
|
| 605 |
-
)
|
| 606 |
-
if items["items"]:
|
| 607 |
-
result[title] = items["items"]
|
| 608 |
-
return result
|
| 609 |
-
```
|
|
|
|
| 1 |
+
Compatibility wrapper over the live `.prod` Monty prompt:
|
| 2 |
|
| 3 |
+
{{file:.prod/agent-cards/shared/_monty_codegen_shared.md}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.prefab/agent-cards/_prefab_wire_shared.md
CHANGED
|
@@ -181,6 +181,46 @@ Prefer:
|
|
| 181 |
- structure over decoration
|
| 182 |
- a few confident sections over many tiny widgets
|
| 183 |
- built-in variants over custom color classes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
If `theme` is omitted, the default renderer styling should look mostly good out of the box.
|
| 186 |
Do not hand-author lots of colors unless the user explicitly asks for branding.
|
|
@@ -253,6 +293,9 @@ Prefer this palette first:
|
|
| 253 |
- `PieChart`
|
| 254 |
- `LineChart`
|
| 255 |
- `BarChart`
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
Useful but secondary:
|
| 258 |
- `ButtonGroup`
|
|
@@ -438,6 +481,7 @@ For Hugging Face Hub-style results, these defaults are especially good:
|
|
| 438 |
|
| 439 |
For Hub search/navigation results:
|
| 440 |
- preserve important names, ids, counts, dates, and URLs exactly from the payload
|
|
|
|
| 441 |
- do not invent values or smooth over missing fields
|
| 442 |
- highlight a few useful summary metrics before the full table
|
| 443 |
- preserve ranking/order clearly when ranking matters
|
|
|
|
| 181 |
- structure over decoration
|
| 182 |
- a few confident sections over many tiny widgets
|
| 183 |
- built-in variants over custom color classes
|
| 184 |
+
- app-like restraint over marketing chrome
|
| 185 |
+
- a strong primary workspace over a wall of cards
|
| 186 |
+
|
| 187 |
+
## Frontend-friendly defaults
|
| 188 |
+
|
| 189 |
+
Bias toward calm product UI rather than raw data dumps.
|
| 190 |
+
|
| 191 |
+
Prefer these compositions:
|
| 192 |
+
- search / browse pages:
|
| 193 |
+
- one summary card or slim header row
|
| 194 |
+
- optional KPI grid (`Grid` + `Metric`) for 2-4 headline numbers
|
| 195 |
+
- one main results surface, usually `DataTable`
|
| 196 |
+
- grouped counts / proportions:
|
| 197 |
+
- split layout with a donut `PieChart` and a compact `DataTable`
|
| 198 |
+
- forms / filters:
|
| 199 |
+
- short option lists → `Select`
|
| 200 |
+
- long option lists or tags / categories → `Combobox`
|
| 201 |
+
- multi-value tags / categories → `MultiSelect`
|
| 202 |
+
- model-driven forms should feel like compact operator UI, not generic CRUD dumps
|
| 203 |
+
|
| 204 |
+
For tables:
|
| 205 |
+
- if there are more than ~8 rows, prefer `search: true`
|
| 206 |
+
- if there are more than ~10 rows, prefer `paginated: true` with a sensible `pageSize`
|
| 207 |
+
- if a numeric column is clearly a metric, align it right and use `format: "number"`
|
| 208 |
+
- if a short categorical column should work like a facet (tags, repo type, status), set `DataTableColumn.filterable: true`
|
| 209 |
+
- hide long raw URL columns when `onRowClick` or action buttons communicate the destination better
|
| 210 |
+
|
| 211 |
+
For charts:
|
| 212 |
+
- use donut charts for 2-8 grouped categories with one obvious label key and one obvious numeric key
|
| 213 |
+
- prefer `innerRadius: 60`, `paddingAngle: 2`, `showLegend: true`, `showTooltip: true`
|
| 214 |
+
- when combining charts and tables, usually stack the chart above the table rather than placing them side-by-side, because tables are wide and charts stay legible in a narrower vertical slot
|
| 215 |
+
- only use a horizontal chart+table split when both are compact and the table has very few columns
|
| 216 |
+
- avoid charts when the answer is just a long ranking table
|
| 217 |
+
|
| 218 |
+
Avoid:
|
| 219 |
+
- giant dashboards made of many small cards
|
| 220 |
+
- decorative heroes, gradient marketing sections, or center-column landing-page layouts
|
| 221 |
+
- repeated `Separator` stacks where a `Card`, `Tabs`, or `Grid` would create clearer hierarchy
|
| 222 |
+
- noisy badge soup; badges should be short and sparse
|
| 223 |
+
- dumping every field just because it exists
|
| 224 |
|
| 225 |
If `theme` is omitted, the default renderer styling should look mostly good out of the box.
|
| 226 |
Do not hand-author lots of colors unless the user explicitly asks for branding.
|
|
|
|
| 293 |
- `PieChart`
|
| 294 |
- `LineChart`
|
| 295 |
- `BarChart`
|
| 296 |
+
- `Select`
|
| 297 |
+
- `Combobox`
|
| 298 |
+
- `MultiSelect`
|
| 299 |
|
| 300 |
Useful but secondary:
|
| 301 |
- `ButtonGroup`
|
|
|
|
| 481 |
|
| 482 |
For Hub search/navigation results:
|
| 483 |
- preserve important names, ids, counts, dates, and URLs exactly from the payload
|
| 484 |
+
- avatar urls should be displayed as icons
|
| 485 |
- do not invent values or smooth over missing fields
|
| 486 |
- highlight a few useful summary metrics before the full table
|
| 487 |
- preserve ranking/order clearly when ranking matters
|
.prefab/agent-cards/hub_search_raw.md
CHANGED
|
@@ -8,7 +8,7 @@ description: "Raw live-service card for Hub search. Returns runtime-owned JSON w
|
|
| 8 |
shell: false
|
| 9 |
skills: []
|
| 10 |
function_tools:
|
| 11 |
-
- ../
|
| 12 |
request_params:
|
| 13 |
tool_result_mode: passthrough
|
| 14 |
---
|
|
|
|
| 8 |
shell: false
|
| 9 |
skills: []
|
| 10 |
function_tools:
|
| 11 |
+
- ../monty_api/tool_entrypoints.py:hf_hub_query_raw
|
| 12 |
request_params:
|
| 13 |
tool_result_mode: passthrough
|
| 14 |
---
|
.prefab/fastagent.config.yaml
CHANGED
|
@@ -3,9 +3,7 @@ default_model: "$system.raw"
|
|
| 3 |
model_references:
|
| 4 |
system:
|
| 5 |
default: "$system.raw"
|
| 6 |
-
raw:
|
| 7 |
-
prefab_native: minimax25
|
| 8 |
-
prefab_llm: gpt-oss
|
| 9 |
|
| 10 |
logger:
|
| 11 |
truncate_tools: false
|
|
|
|
| 3 |
model_references:
|
| 4 |
system:
|
| 5 |
default: "$system.raw"
|
| 6 |
+
raw: qwen35instruct
|
|
|
|
|
|
|
| 7 |
|
| 8 |
logger:
|
| 9 |
truncate_tools: false
|
.prefab/monty_api/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from .tool_entrypoints import HELPER_EXTERNALS, hf_hub_query, hf_hub_query_raw, main
|
| 4 |
+
|
| 5 |
+
__all__ = [
|
| 6 |
+
"HELPER_EXTERNALS",
|
| 7 |
+
"hf_hub_query",
|
| 8 |
+
"hf_hub_query_raw",
|
| 9 |
+
"main",
|
| 10 |
+
]
|
.prefab/monty_api/tool_entrypoints.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Prefab-local shim over the live production Monty entrypoints."""
|
| 3 |
+
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import importlib.util
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
_SOURCE = (
|
| 11 |
+
Path(__file__).resolve().parents[2]
|
| 12 |
+
/ ".prod"
|
| 13 |
+
/ "monty_api"
|
| 14 |
+
/ "tool_entrypoints.py"
|
| 15 |
+
)
|
| 16 |
+
_SPEC = importlib.util.spec_from_file_location("_prefab_prod_tool_entrypoints", _SOURCE)
|
| 17 |
+
if _SPEC is None or _SPEC.loader is None:
|
| 18 |
+
raise RuntimeError(f"could not load source tool entrypoints from {_SOURCE}")
|
| 19 |
+
|
| 20 |
+
_MODULE = importlib.util.module_from_spec(_SPEC)
|
| 21 |
+
_SPEC.loader.exec_module(_MODULE)
|
| 22 |
+
|
| 23 |
+
HELPER_EXTERNALS = _MODULE.HELPER_EXTERNALS
|
| 24 |
+
main = _MODULE.main
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
async def hf_hub_query(
|
| 28 |
+
query: str,
|
| 29 |
+
code: str,
|
| 30 |
+
max_calls: int | None = None,
|
| 31 |
+
timeout_sec: int | None = None,
|
| 32 |
+
) -> dict[str, Any]:
|
| 33 |
+
return await _MODULE.hf_hub_query(
|
| 34 |
+
query=query,
|
| 35 |
+
code=code,
|
| 36 |
+
max_calls=max_calls,
|
| 37 |
+
timeout_sec=timeout_sec,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def hf_hub_query_raw(
|
| 42 |
+
query: str,
|
| 43 |
+
code: str,
|
| 44 |
+
max_calls: int | None = None,
|
| 45 |
+
timeout_sec: int | None = None,
|
| 46 |
+
) -> Any:
|
| 47 |
+
return await _MODULE.hf_hub_query_raw(
|
| 48 |
+
query=query,
|
| 49 |
+
code=code,
|
| 50 |
+
max_calls=max_calls,
|
| 51 |
+
timeout_sec=timeout_sec,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
__all__ = [
|
| 56 |
+
"HELPER_EXTERNALS",
|
| 57 |
+
"hf_hub_query",
|
| 58 |
+
"hf_hub_query_raw",
|
| 59 |
+
"main",
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
if __name__ == "__main__":
|
| 63 |
+
raise SystemExit(main())
|
.prefab/tool-cards/monty_api_tool_v2.py
CHANGED
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
_SOURCE = (
|
| 8 |
-
Path(__file__).resolve().parents[
|
| 9 |
)
|
| 10 |
_SPEC = importlib.util.spec_from_file_location("_prefab_monty_api_tool_v2", _SOURCE)
|
| 11 |
if _SPEC is None or _SPEC.loader is None:
|
|
@@ -14,12 +14,15 @@ if _SPEC is None or _SPEC.loader is None:
|
|
| 14 |
_MODULE = importlib.util.module_from_spec(_SPEC)
|
| 15 |
_SPEC.loader.exec_module(_MODULE)
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
async def hf_hub_query(
|
| 19 |
query: str,
|
| 20 |
code: str,
|
| 21 |
-
max_calls: int | None =
|
| 22 |
-
timeout_sec: int | None =
|
| 23 |
) -> dict[str, Any]:
|
| 24 |
return await _MODULE.hf_hub_query(
|
| 25 |
query=query,
|
|
@@ -32,8 +35,8 @@ async def hf_hub_query(
|
|
| 32 |
async def hf_hub_query_raw(
|
| 33 |
query: str,
|
| 34 |
code: str,
|
| 35 |
-
max_calls: int | None =
|
| 36 |
-
timeout_sec: int | None =
|
| 37 |
) -> Any:
|
| 38 |
return await _MODULE.hf_hub_query_raw(
|
| 39 |
query=query,
|
|
@@ -41,3 +44,14 @@ async def hf_hub_query_raw(
|
|
| 41 |
max_calls=max_calls,
|
| 42 |
timeout_sec=timeout_sec,
|
| 43 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
_SOURCE = (
|
| 8 |
+
Path(__file__).resolve().parents[1] / "monty_api" / "tool_entrypoints.py"
|
| 9 |
)
|
| 10 |
_SPEC = importlib.util.spec_from_file_location("_prefab_monty_api_tool_v2", _SOURCE)
|
| 11 |
if _SPEC is None or _SPEC.loader is None:
|
|
|
|
| 14 |
_MODULE = importlib.util.module_from_spec(_SPEC)
|
| 15 |
_SPEC.loader.exec_module(_MODULE)
|
| 16 |
|
| 17 |
+
HELPER_EXTERNALS = _MODULE.HELPER_EXTERNALS
|
| 18 |
+
main = _MODULE.main
|
| 19 |
+
|
| 20 |
|
| 21 |
async def hf_hub_query(
|
| 22 |
query: str,
|
| 23 |
code: str,
|
| 24 |
+
max_calls: int | None = None,
|
| 25 |
+
timeout_sec: int | None = None,
|
| 26 |
) -> dict[str, Any]:
|
| 27 |
return await _MODULE.hf_hub_query(
|
| 28 |
query=query,
|
|
|
|
| 35 |
async def hf_hub_query_raw(
|
| 36 |
query: str,
|
| 37 |
code: str,
|
| 38 |
+
max_calls: int | None = None,
|
| 39 |
+
timeout_sec: int | None = None,
|
| 40 |
) -> Any:
|
| 41 |
return await _MODULE.hf_hub_query_raw(
|
| 42 |
query=query,
|
|
|
|
| 44 |
max_calls=max_calls,
|
| 45 |
timeout_sec=timeout_sec,
|
| 46 |
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
__all__ = [
|
| 50 |
+
"HELPER_EXTERNALS",
|
| 51 |
+
"hf_hub_query",
|
| 52 |
+
"hf_hub_query_raw",
|
| 53 |
+
"main",
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
raise SystemExit(main())
|
.prod/agent-cards/shared/_monty_codegen_shared.md
ADDED
|
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Code Generation Rules
|
| 2 |
+
|
| 3 |
+
- You are writing Python to be executed in a secure runtime environment.
|
| 4 |
+
- **NEVER** use `import` - it is NOT available in this environment.
|
| 5 |
+
- All helper calls are async: always use `await`.
|
| 6 |
+
- Use this exact outer shape:
|
| 7 |
+
|
| 8 |
+
```py
|
| 9 |
+
async def solve(query, max_calls):
|
| 10 |
+
...
|
| 11 |
+
|
| 12 |
+
await solve(query, max_calls)
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
- `max_calls` is the total external-call budget for the whole program.
|
| 16 |
+
- Use only documented `hf_*` helpers.
|
| 17 |
+
- Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
|
| 18 |
+
- Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
|
| 19 |
+
- Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
|
| 20 |
+
- If the user says "return only" some fields, return exactly that final shape.
|
| 21 |
+
- If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
|
| 22 |
+
- For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
|
| 23 |
+
- If a current-user helper returns `ok=false`, return that helper response directly.
|
| 24 |
+
|
| 25 |
+
## Search rules
|
| 26 |
+
|
| 27 |
+
- If the user is asking about models, use `hf_models_search(...)`.
|
| 28 |
+
- If the user is asking about datasets, use `hf_datasets_search(...)`.
|
| 29 |
+
- If the user is asking about spaces, use `hf_spaces_search(...)`.
|
| 30 |
+
- Use `hf_repo_search(...)` only for intentionally cross-type search.
|
| 31 |
+
- Use `hf_trending(...)` only for the small "what is trending right now" feed.
|
| 32 |
+
- If the user says "trending" but also adds searchable constraints like `pipeline_tag`, `author`, search text, or `num_params` bounds, prefer the repo search helper sorted by `trending_score`.
|
| 33 |
+
- Think of search helpers as filter-first discovery and `hf_trending(...)` as rank-first current-feed inspection.
|
| 34 |
+
|
| 35 |
+
## Parameter notes
|
| 36 |
+
|
| 37 |
+
- Trust the generated helper contracts below for per-helper params, fields, sort keys, expand values, and defaults.
|
| 38 |
+
- When the user asks for helper-owned coverage metadata, use `helper_resp["meta"]`.
|
| 39 |
+
- Treat any of the following helper-meta signals as coverage-sensitive: `limit_boundary_hit`, `truncated`, `more_available` not equal to `False`, `sample_complete=false`, `exact_count=false`, `ranking_complete=false`, `ranking_window_hit=true`, or `hard_cap_applied=true`. In those cases, do **not** return bare items; return `{"results": ..., "coverage": ...}`.
|
| 40 |
+
- For pro-only follower/member/liker queries, prefer `pro_only=True` instead of filtering on a projected field.
|
| 41 |
+
- `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
|
| 42 |
+
- When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
|
| 43 |
+
- For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
|
| 44 |
+
- Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
|
| 45 |
+
|
| 46 |
+
- Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
|
| 47 |
+
- Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
|
| 48 |
+
- Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
|
| 49 |
+
- For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
|
| 50 |
+
- For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
|
| 51 |
+
- Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
|
| 52 |
+
- Push constraints upstream whenever a first-class helper argument exists.
|
| 53 |
+
- `post_filter` is only for normalized row filters that cannot be pushed upstream.
|
| 54 |
+
- Keep `post_filter` simple:
|
| 55 |
+
- exact match or `in` for returned fields like `runtime_stage`
|
| 56 |
+
- `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
|
| 57 |
+
- `num_params` is one of the main valid reasons to use `post_filter` on model search today.
|
| 58 |
+
- Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
|
| 59 |
+
|
| 60 |
+
Examples:
|
| 61 |
+
|
| 62 |
+
```py
|
| 63 |
+
await hf_models_search(pipeline_tag="text-to-image", limit=10)
|
| 64 |
+
await hf_datasets_search(search="speech", sort="downloads", limit=10)
|
| 65 |
+
await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
|
| 66 |
+
await hf_models_search(
|
| 67 |
+
pipeline_tag="text-generation",
|
| 68 |
+
sort="trending_score",
|
| 69 |
+
limit=50,
|
| 70 |
+
post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
|
| 71 |
+
)
|
| 72 |
+
await hf_collections_search(owner="Qwen", limit=10)
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
Field-only pattern:
|
| 76 |
+
|
| 77 |
+
```py
|
| 78 |
+
resp = await hf_models_search(
|
| 79 |
+
pipeline_tag="text-to-image",
|
| 80 |
+
fields=["repo_id", "author", "likes", "downloads", "repo_url"],
|
| 81 |
+
limit=3,
|
| 82 |
+
)
|
| 83 |
+
return resp["items"]
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
Coverage pattern:
|
| 87 |
+
|
| 88 |
+
```py
|
| 89 |
+
resp = await hf_user_likes(
|
| 90 |
+
username="julien-c",
|
| 91 |
+
sort="repo_likes",
|
| 92 |
+
ranking_window=50,
|
| 93 |
+
limit=20,
|
| 94 |
+
fields=["repo_id", "repo_likes", "repo_url"],
|
| 95 |
+
)
|
| 96 |
+
return {"results": resp["items"], "coverage": resp["meta"]}
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
Owner-inventory pattern:
|
| 100 |
+
|
| 101 |
+
```py
|
| 102 |
+
profile = await hf_profile_summary(handle="huggingface")
|
| 103 |
+
count = (profile.get("item") or {}).get("spaces_count")
|
| 104 |
+
limit = 200 if not isinstance(count, int) else min(max(count, 1), 200)
|
| 105 |
+
resp = await hf_spaces_search(
|
| 106 |
+
author="huggingface",
|
| 107 |
+
limit=limit,
|
| 108 |
+
fields=["repo_id", "repo_url"],
|
| 109 |
+
)
|
| 110 |
+
meta = resp.get("meta") or {}
|
| 111 |
+
if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
|
| 112 |
+
return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
|
| 113 |
+
return resp["items"]
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Profile-count pattern:
|
| 117 |
+
|
| 118 |
+
```py
|
| 119 |
+
profile = await hf_profile_summary(handle="mishig")
|
| 120 |
+
item = profile["item"] or {}
|
| 121 |
+
return {
|
| 122 |
+
"followers_count": item.get("followers_count"),
|
| 123 |
+
"following_count": item.get("following_count"),
|
| 124 |
+
}
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
Pro-followers pattern:
|
| 128 |
+
|
| 129 |
+
```py
|
| 130 |
+
followers = await hf_user_graph(
|
| 131 |
+
relation="followers",
|
| 132 |
+
pro_only=True,
|
| 133 |
+
limit=20,
|
| 134 |
+
fields=["username"],
|
| 135 |
+
)
|
| 136 |
+
return followers["items"]
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
## Navigation graph
|
| 140 |
+
|
| 141 |
+
Use the helper that matches the question type.
|
| 142 |
+
|
| 143 |
+
- exact repo details → `hf_repo_details(...)`
|
| 144 |
+
- model search/list/discovery → `hf_models_search(...)`
|
| 145 |
+
- dataset search/list/discovery → `hf_datasets_search(...)`
|
| 146 |
+
- space search/list/discovery → `hf_spaces_search(...)`
|
| 147 |
+
- cross-type repo search → `hf_repo_search(...)`
|
| 148 |
+
- trending repos → `hf_trending(...)`
|
| 149 |
+
- daily papers → `hf_daily_papers(...)`
|
| 150 |
+
- repo discussions → `hf_repo_discussions(...)`
|
| 151 |
+
- specific discussion details → `hf_repo_discussion_details(...)`
|
| 152 |
+
- users who liked one repo → `hf_repo_likers(...)`
|
| 153 |
+
- profile / overview / aggregate counts → `hf_profile_summary(...)`
|
| 154 |
+
- followers / following lists → `hf_user_graph(...)`
|
| 155 |
+
- repos a user liked → `hf_user_likes(...)`
|
| 156 |
+
- recent activity feed → `hf_recent_activity(...)`
|
| 157 |
+
- organization members → `hf_org_members(...)`
|
| 158 |
+
- collections search → `hf_collections_search(...)`
|
| 159 |
+
- items inside a known collection → `hf_collection_items(...)`
|
| 160 |
+
- explicit current username → `hf_whoami()`
|
| 161 |
+
|
| 162 |
+
Direction reminders:
|
| 163 |
+
- `hf_user_likes(...)` = user → repos
|
| 164 |
+
- `hf_repo_likers(...)` = repo → users
|
| 165 |
+
- `hf_user_graph(...)` = user/org → followers/following
|
| 166 |
+
|
| 167 |
+
## Helper result shape
|
| 168 |
+
|
| 169 |
+
All helpers return:
|
| 170 |
+
|
| 171 |
+
```py
|
| 172 |
+
{
|
| 173 |
+
"ok": bool,
|
| 174 |
+
"item": dict | None,
|
| 175 |
+
"items": list[dict],
|
| 176 |
+
"meta": dict,
|
| 177 |
+
"error": str | None,
|
| 178 |
+
}
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
Rules:
|
| 182 |
+
- `items` is the canonical list field.
|
| 183 |
+
- `item` is just a singleton convenience.
|
| 184 |
+
- `meta` contains helper-owned execution, limit, and coverage info.
|
| 185 |
+
- When helper-owned coverage matters, prefer returning the helper envelope directly.
|
| 186 |
+
|
| 187 |
+
## High-signal output rules
|
| 188 |
+
|
| 189 |
+
- Prefer compact dict/list outputs over prose when the user asked for fields.
|
| 190 |
+
- Prefer summary helpers before detail hydration.
|
| 191 |
+
- Use canonical snake_case keys in generated code and structured output.
|
| 192 |
+
- Use `repo_id` as the display label for repos.
|
| 193 |
+
- Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
|
| 194 |
+
- For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
|
| 195 |
+
- For joins/intersections/rankings, fetch the needed working set first and compute locally.
|
| 196 |
+
- If the result is partial, use top-level keys `results` and `coverage`.
|
| 197 |
+
|
| 198 |
+
## Helper signatures (generated from Python)
|
| 199 |
+
|
| 200 |
+
These signatures are exported from the live runtime with `inspect.signature(...)`.
|
| 201 |
+
If prompt prose and signatures disagree, trust these signatures.
|
| 202 |
+
|
| 203 |
+
```py
|
| 204 |
+
await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' = None, limit: 'int' = 100, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 205 |
+
|
| 206 |
+
await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 207 |
+
|
| 208 |
+
await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 209 |
+
|
| 210 |
+
await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 211 |
+
|
| 212 |
+
await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 213 |
+
|
| 214 |
+
await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 215 |
+
|
| 216 |
+
await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
|
| 217 |
+
|
| 218 |
+
await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 219 |
+
|
| 220 |
+
await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 221 |
+
|
| 222 |
+
await hf_repo_discussion_details(repo_type: 'str', repo_id: 'str', discussion_num: 'int', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 223 |
+
|
| 224 |
+
await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 225 |
+
|
| 226 |
+
await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 227 |
+
|
| 228 |
+
await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 20, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 229 |
+
|
| 230 |
+
await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
|
| 231 |
+
|
| 232 |
+
await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 233 |
+
|
| 234 |
+
await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 235 |
+
|
| 236 |
+
await hf_user_graph(username: 'str | None' = None, relation: 'str' = 'followers', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 237 |
+
|
| 238 |
+
await hf_user_likes(username: 'str | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None, sort: 'str | None' = None, ranking_window: 'int | None' = None) -> 'dict[str, Any]'
|
| 239 |
+
|
| 240 |
+
await hf_whoami() -> 'dict[str, Any]'
|
| 241 |
+
```
|
| 242 |
+
|
| 243 |
+
## Helper contracts (generated from runtime + wrapper metadata)
|
| 244 |
+
|
| 245 |
+
These contracts describe the normalized wrapper surface exposed to generated code.
|
| 246 |
+
Field names and helper-visible enum values are canonical snake_case wrapper names.
|
| 247 |
+
|
| 248 |
+
All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
| 249 |
+
|
| 250 |
+
### hf_collection_items
|
| 251 |
+
|
| 252 |
+
- category: `collection_navigation`
|
| 253 |
+
- returns:
|
| 254 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 255 |
+
- row_type: `repo`
|
| 256 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 257 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `repo_url`
|
| 258 |
+
- optional_fields: `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 259 |
+
- supported_params: `collection_id`, `repo_types`, `limit`, `count_only`, `where`, `fields`
|
| 260 |
+
- param_values:
|
| 261 |
+
- repo_types: `model`, `dataset`, `space`
|
| 262 |
+
- fields_contract:
|
| 263 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 264 |
+
- canonical_only: `true`
|
| 265 |
+
- where_contract:
|
| 266 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 267 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 268 |
+
- normalized_only: `true`
|
| 269 |
+
- limit_contract:
|
| 270 |
+
- default_limit: `100`
|
| 271 |
+
- max_limit: `500`
|
| 272 |
+
- notes: Returns repos inside one collection as summary rows.
|
| 273 |
+
|
| 274 |
+
### hf_collections_search
|
| 275 |
+
|
| 276 |
+
- category: `collection_search`
|
| 277 |
+
- returns:
|
| 278 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 279 |
+
- row_type: `collection`
|
| 280 |
+
- default_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 281 |
+
- guaranteed_fields: `collection_id`, `title`, `owner`
|
| 282 |
+
- optional_fields: `slug`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 283 |
+
- supported_params: `query`, `owner`, `limit`, `count_only`, `where`, `fields`
|
| 284 |
+
- fields_contract:
|
| 285 |
+
- allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 286 |
+
- canonical_only: `true`
|
| 287 |
+
- where_contract:
|
| 288 |
+
- allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 289 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 290 |
+
- normalized_only: `true`
|
| 291 |
+
- limit_contract:
|
| 292 |
+
- default_limit: `20`
|
| 293 |
+
- max_limit: `500`
|
| 294 |
+
- notes: Collection summary helper.
|
| 295 |
+
|
| 296 |
+
### hf_daily_papers
|
| 297 |
+
|
| 298 |
+
- category: `curated_feed`
|
| 299 |
+
- returns:
|
| 300 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 301 |
+
- row_type: `daily_paper`
|
| 302 |
+
- default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 303 |
+
- guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
|
| 304 |
+
- optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
|
| 305 |
+
- supported_params: `limit`, `where`, `fields`
|
| 306 |
+
- fields_contract:
|
| 307 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 308 |
+
- canonical_only: `true`
|
| 309 |
+
- where_contract:
|
| 310 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 311 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 312 |
+
- normalized_only: `true`
|
| 313 |
+
- limit_contract:
|
| 314 |
+
- default_limit: `20`
|
| 315 |
+
- max_limit: `500`
|
| 316 |
+
- notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
|
| 317 |
+
|
| 318 |
+
### hf_datasets_search
|
| 319 |
+
|
| 320 |
+
- category: `wrapped_hf_repo_search`
|
| 321 |
+
- backed_by: `HfApi.list_datasets`
|
| 322 |
+
- returns:
|
| 323 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 324 |
+
- row_type: `repo`
|
| 325 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 326 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 327 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 328 |
+
- supported_params: `search`, `filter`, `author`, `benchmark`, `dataset_name`, `gated`, `language_creators`, `language`, `multilinguality`, `size_categories`, `task_categories`, `task_ids`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
|
| 329 |
+
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 330 |
+
- expand_values: `author`, `card_data`, `citation`, `created_at`, `description`, `disabled`, `downloads`, `downloads_all_time`, `gated`, `last_modified`, `likes`, `paperswithcode_id`, `private`, `resource_group`, `sha`, `siblings`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
|
| 331 |
+
- fields_contract:
|
| 332 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 333 |
+
- canonical_only: `true`
|
| 334 |
+
- post_filter_contract:
|
| 335 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 336 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 337 |
+
- normalized_only: `true`
|
| 338 |
+
- limit_contract:
|
| 339 |
+
- default_limit: `20`
|
| 340 |
+
- max_limit: `5000`
|
| 341 |
+
- notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 342 |
+
|
| 343 |
+
### hf_models_search
|
| 344 |
+
|
| 345 |
+
- category: `wrapped_hf_repo_search`
|
| 346 |
+
- backed_by: `HfApi.list_models`
|
| 347 |
+
- returns:
|
| 348 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 349 |
+
- row_type: `repo`
|
| 350 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 351 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 352 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 353 |
+
- supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
|
| 354 |
+
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 355 |
+
- expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
|
| 356 |
+
- fields_contract:
|
| 357 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 358 |
+
- canonical_only: `true`
|
| 359 |
+
- post_filter_contract:
|
| 360 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 361 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 362 |
+
- normalized_only: `true`
|
| 363 |
+
- limit_contract:
|
| 364 |
+
- default_limit: `20`
|
| 365 |
+
- max_limit: `5000`
|
| 366 |
+
- notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 367 |
+
|
| 368 |
+
### hf_org_members
|
| 369 |
+
|
| 370 |
+
- category: `graph_scan`
|
| 371 |
+
- returns:
|
| 372 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 373 |
+
- row_type: `actor`
|
| 374 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 375 |
+
- guaranteed_fields: `username`
|
| 376 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 377 |
+
- supported_params: `organization`, `limit`, `scan_limit`, `count_only`, `where`, `fields`
|
| 378 |
+
- fields_contract:
|
| 379 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 380 |
+
- canonical_only: `true`
|
| 381 |
+
- where_contract:
|
| 382 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 383 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 384 |
+
- normalized_only: `true`
|
| 385 |
+
- limit_contract:
|
| 386 |
+
- default_limit: `1000`
|
| 387 |
+
- max_limit: `10000`
|
| 388 |
+
- scan_max: `10000`
|
| 389 |
+
- notes: Returns organization member summary rows.
|
| 390 |
+
|
| 391 |
+
### hf_profile_summary
|
| 392 |
+
|
| 393 |
+
- category: `profile_summary`
|
| 394 |
+
- returns:
|
| 395 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 396 |
+
- row_type: `profile`
|
| 397 |
+
- default_fields: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
|
| 398 |
+
- guaranteed_fields: `handle`, `entity_type`
|
| 399 |
+
- optional_fields: `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
|
| 400 |
+
- supported_params: `handle`, `include`, `likes_limit`, `activity_limit`
|
| 401 |
+
- param_values:
|
| 402 |
+
- include: `likes`, `activity`
|
| 403 |
+
- notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
|
| 404 |
+
|
| 405 |
+
### hf_recent_activity
|
| 406 |
+
|
| 407 |
+
- category: `activity_feed`
|
| 408 |
+
- returns:
|
| 409 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 410 |
+
- row_type: `activity`
|
| 411 |
+
- default_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 412 |
+
- guaranteed_fields: `event_type`, `timestamp`
|
| 413 |
+
- optional_fields: `repo_id`, `repo_type`
|
| 414 |
+
- supported_params: `feed_type`, `entity`, `activity_types`, `repo_types`, `limit`, `max_pages`, `start_cursor`, `count_only`, `where`, `fields`
|
| 415 |
+
- param_values:
|
| 416 |
+
- feed_type: `user`, `org`
|
| 417 |
+
- repo_types: `model`, `dataset`, `space`
|
| 418 |
+
- fields_contract:
|
| 419 |
+
- allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 420 |
+
- canonical_only: `true`
|
| 421 |
+
- where_contract:
|
| 422 |
+
- allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 423 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 424 |
+
- normalized_only: `true`
|
| 425 |
+
- limit_contract:
|
| 426 |
+
- default_limit: `100`
|
| 427 |
+
- max_limit: `2000`
|
| 428 |
+
- max_pages: `10`
|
| 429 |
+
- page_limit: `100`
|
| 430 |
+
- notes: Activity helper may fetch multiple pages when requested coverage exceeds one page. count_only may still be a lower bound unless the feed exhausts before max_pages.
|
| 431 |
+
|
| 432 |
+
### hf_repo_details
|
| 433 |
+
|
| 434 |
+
- category: `repo_detail`
|
| 435 |
+
- returns:
|
| 436 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 437 |
+
- row_type: `repo`
|
| 438 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 439 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 440 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 441 |
+
- supported_params: `repo_id`, `repo_ids`, `repo_type`, `fields`
|
| 442 |
+
- param_values:
|
| 443 |
+
- repo_type: `model`, `dataset`, `space`, `auto`
|
| 444 |
+
- fields_contract:
|
| 445 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 446 |
+
- canonical_only: `true`
|
| 447 |
+
- notes: Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.
|
| 448 |
+
|
| 449 |
+
### hf_repo_discussion_details
|
| 450 |
+
|
| 451 |
+
- category: `discussion_detail`
|
| 452 |
+
- returns:
|
| 453 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 454 |
+
- row_type: `discussion_detail`
|
| 455 |
+
- default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 456 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `title`, `author`, `status`
|
| 457 |
+
- optional_fields: `num`, `created_at`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 458 |
+
- supported_params: `repo_type`, `repo_id`, `discussion_num`, `fields`
|
| 459 |
+
- param_values:
|
| 460 |
+
- repo_type: `model`, `dataset`, `space`
|
| 461 |
+
- fields_contract:
|
| 462 |
+
- allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 463 |
+
- canonical_only: `true`
|
| 464 |
+
- notes: Exact discussion detail helper.
|
| 465 |
+
|
| 466 |
+
### hf_repo_discussions
|
| 467 |
+
|
| 468 |
+
- category: `discussion_summary`
|
| 469 |
+
- returns:
|
| 470 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 471 |
+
- row_type: `discussion`
|
| 472 |
+
- default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
|
| 473 |
+
- guaranteed_fields: `num`, `title`, `author`, `status`
|
| 474 |
+
- optional_fields: `repo_id`, `repo_type`, `created_at`, `url`
|
| 475 |
+
- supported_params: `repo_type`, `repo_id`, `limit`, `fields`
|
| 476 |
+
- param_values:
|
| 477 |
+
- repo_type: `model`, `dataset`, `space`
|
| 478 |
+
- fields_contract:
|
| 479 |
+
- allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
|
| 480 |
+
- canonical_only: `true`
|
| 481 |
+
- limit_contract:
|
| 482 |
+
- default_limit: `20`
|
| 483 |
+
- max_limit: `200`
|
| 484 |
+
- notes: Discussion summary helper.
|
| 485 |
+
|
| 486 |
+
### hf_repo_likers
|
| 487 |
+
|
| 488 |
+
- category: `repo_to_users`
|
| 489 |
+
- returns:
|
| 490 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 491 |
+
- row_type: `actor`
|
| 492 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 493 |
+
- guaranteed_fields: `username`
|
| 494 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 495 |
+
- supported_params: `repo_id`, `repo_type`, `limit`, `count_only`, `pro_only`, `where`, `fields`
|
| 496 |
+
- param_values:
|
| 497 |
+
- repo_type: `model`, `dataset`, `space`
|
| 498 |
+
- fields_contract:
|
| 499 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 500 |
+
- canonical_only: `true`
|
| 501 |
+
- where_contract:
|
| 502 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 503 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 504 |
+
- normalized_only: `true`
|
| 505 |
+
- limit_contract:
|
| 506 |
+
- default_limit: `1000`
|
| 507 |
+
- notes: Returns users who liked a repo.
|
| 508 |
+
|
| 509 |
+
### hf_repo_search
|
| 510 |
+
|
| 511 |
+
- category: `cross_type_repo_search`
|
| 512 |
+
- returns:
|
| 513 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 514 |
+
- row_type: `repo`
|
| 515 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 516 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 517 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 518 |
+
- supported_params: `search`, `repo_type`, `repo_types`, `filter`, `author`, `sort`, `limit`, `fields`, `post_filter`
|
| 519 |
+
- sort_values_by_repo_type:
|
| 520 |
+
- dataset: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 521 |
+
- model: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 522 |
+
- space: `created_at`, `last_modified`, `likes`, `trending_score`
|
| 523 |
+
- param_values:
|
| 524 |
+
- repo_type: `model`, `dataset`, `space`
|
| 525 |
+
- repo_types: `model`, `dataset`, `space`
|
| 526 |
+
- sort: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 527 |
+
- fields_contract:
|
| 528 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 529 |
+
- canonical_only: `true`
|
| 530 |
+
- post_filter_contract:
|
| 531 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 532 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 533 |
+
- normalized_only: `true`
|
| 534 |
+
- limit_contract:
|
| 535 |
+
- default_limit: `20`
|
| 536 |
+
- max_limit: `5000`
|
| 537 |
+
- notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 538 |
+
|
| 539 |
+
### hf_runtime_capabilities
|
| 540 |
+
|
| 541 |
+
- category: `introspection`
|
| 542 |
+
- returns:
|
| 543 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 544 |
+
- row_type: `runtime_capability`
|
| 545 |
+
- default_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 546 |
+
- guaranteed_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 547 |
+
- optional_fields: []
|
| 548 |
+
- supported_params: `section`
|
| 549 |
+
- param_values:
|
| 550 |
+
- section: `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 551 |
+
- notes: Introspection helper. Use section=... to narrow the response.
|
| 552 |
+
|
| 553 |
+
### hf_spaces_search
|
| 554 |
+
|
| 555 |
+
- category: `wrapped_hf_repo_search`
|
| 556 |
+
- backed_by: `HfApi.list_spaces`
|
| 557 |
+
- returns:
|
| 558 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 559 |
+
- row_type: `repo`
|
| 560 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 561 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 562 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 563 |
+
- supported_params: `search`, `filter`, `author`, `datasets`, `models`, `linked`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
|
| 564 |
+
- sort_values: `created_at`, `last_modified`, `likes`, `trending_score`
|
| 565 |
+
- expand_values: `author`, `card_data`, `created_at`, `datasets`, `disabled`, `last_modified`, `likes`, `models`, `private`, `resource_group`, `runtime`, `sdk`, `sha`, `siblings`, `subdomain`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
|
| 566 |
+
- fields_contract:
|
| 567 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 568 |
+
- canonical_only: `true`
|
| 569 |
+
- post_filter_contract:
|
| 570 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 571 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 572 |
+
- normalized_only: `true`
|
| 573 |
+
- limit_contract:
|
| 574 |
+
- default_limit: `20`
|
| 575 |
+
- max_limit: `5000`
|
| 576 |
+
- notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 577 |
+
|
| 578 |
+
### hf_trending
|
| 579 |
+
|
| 580 |
+
- category: `curated_repo_feed`
|
| 581 |
+
- returns:
|
| 582 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 583 |
+
- row_type: `repo`
|
| 584 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 585 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`, `trending_rank`
|
| 586 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 587 |
+
- supported_params: `repo_type`, `limit`, `where`, `fields`
|
| 588 |
+
- param_values:
|
| 589 |
+
- repo_type: `model`, `dataset`, `space`, `all`
|
| 590 |
+
- fields_contract:
|
| 591 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 592 |
+
- canonical_only: `true`
|
| 593 |
+
- where_contract:
|
| 594 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 595 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 596 |
+
- normalized_only: `true`
|
| 597 |
+
- limit_contract:
|
| 598 |
+
- default_limit: `20`
|
| 599 |
+
- max_limit: `20`
|
| 600 |
+
- notes: Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.
|
| 601 |
+
|
| 602 |
+
### hf_user_graph
|
| 603 |
+
|
| 604 |
+
- category: `graph_scan`
|
| 605 |
+
- returns:
|
| 606 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 607 |
+
- row_type: `actor`
|
| 608 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 609 |
+
- guaranteed_fields: `username`
|
| 610 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 611 |
+
- supported_params: `username`, `relation`, `limit`, `scan_limit`, `count_only`, `pro_only`, `where`, `fields`
|
| 612 |
+
- param_values:
|
| 613 |
+
- relation: `followers`, `following`
|
| 614 |
+
- fields_contract:
|
| 615 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 616 |
+
- canonical_only: `true`
|
| 617 |
+
- where_contract:
|
| 618 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 619 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 620 |
+
- normalized_only: `true`
|
| 621 |
+
- limit_contract:
|
| 622 |
+
- default_limit: `1000`
|
| 623 |
+
- max_limit: `10000`
|
| 624 |
+
- scan_max: `10000`
|
| 625 |
+
- notes: Returns followers/following summary rows.
|
| 626 |
+
|
| 627 |
+
### hf_user_likes
|
| 628 |
+
|
| 629 |
+
- category: `user_to_repos`
|
| 630 |
+
- returns:
|
| 631 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 632 |
+
- row_type: `user_like`
|
| 633 |
+
- default_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 634 |
+
- guaranteed_fields: `liked_at`, `repo_id`, `repo_type`
|
| 635 |
+
- optional_fields: `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 636 |
+
- supported_params: `username`, `repo_types`, `limit`, `scan_limit`, `count_only`, `where`, `fields`, `sort`, `ranking_window`
|
| 637 |
+
- sort_values: `liked_at`, `repo_likes`, `repo_downloads`
|
| 638 |
+
- param_values:
|
| 639 |
+
- repo_types: `model`, `dataset`, `space`
|
| 640 |
+
- sort: `liked_at`, `repo_likes`, `repo_downloads`
|
| 641 |
+
- fields_contract:
|
| 642 |
+
- allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 643 |
+
- canonical_only: `true`
|
| 644 |
+
- where_contract:
|
| 645 |
+
- allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 646 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 647 |
+
- normalized_only: `true`
|
| 648 |
+
- limit_contract:
|
| 649 |
+
- default_limit: `100`
|
| 650 |
+
- max_limit: `2000`
|
| 651 |
+
- enrich_max: `50`
|
| 652 |
+
- ranking_default: `50`
|
| 653 |
+
- scan_max: `10000`
|
| 654 |
+
- notes: Default recency mode is cheap. Popularity-ranked sorts use canonical keys liked_at/repo_likes/repo_downloads and rerank only a bounded recent shortlist. Check meta.ranking_complete / meta.ranking_window when ranking by popularity; helper-owned coverage matters here.
|
| 655 |
+
|
| 656 |
+
### hf_whoami
|
| 657 |
+
|
| 658 |
+
- category: `identity`
|
| 659 |
+
- returns:
|
| 660 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 661 |
+
- row_type: `user`
|
| 662 |
+
- default_fields: `username`, `fullname`, `is_pro`
|
| 663 |
+
- guaranteed_fields: `username`
|
| 664 |
+
- optional_fields: `fullname`, `is_pro`
|
| 665 |
+
- supported_params: []
|
| 666 |
+
- notes: Returns the current authenticated user when a request token is available.
|
.prod/agent-cards/shared/_monty_codegen_shared.template.md
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Code Generation Rules
|
| 2 |
+
|
| 3 |
+
- You are writing Python to be executed in a secure runtime environment.
|
| 4 |
+
- **NEVER** use `import` - it is NOT available in this environment.
|
| 5 |
+
- All helper calls are async: always use `await`.
|
| 6 |
+
- Use this exact outer shape:
|
| 7 |
+
|
| 8 |
+
```py
|
| 9 |
+
async def solve(query, max_calls):
|
| 10 |
+
...
|
| 11 |
+
|
| 12 |
+
await solve(query, max_calls)
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
- `max_calls` is the total external-call budget for the whole program.
|
| 16 |
+
- Use only documented `hf_*` helpers.
|
| 17 |
+
- Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
|
| 18 |
+
- Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
|
| 19 |
+
- Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
|
| 20 |
+
- If the user says "return only" some fields, return exactly that final shape.
|
| 21 |
+
- If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
|
| 22 |
+
- For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
|
| 23 |
+
- If a current-user helper returns `ok=false`, return that helper response directly.
|
| 24 |
+
|
| 25 |
+
## Search rules
|
| 26 |
+
|
| 27 |
+
- If the user is asking about models, use `hf_models_search(...)`.
|
| 28 |
+
- If the user is asking about datasets, use `hf_datasets_search(...)`.
|
| 29 |
+
- If the user is asking about spaces, use `hf_spaces_search(...)`.
|
| 30 |
+
- Use `hf_repo_search(...)` only for intentionally cross-type search.
|
| 31 |
+
- Use `hf_trending(...)` only for the small "what is trending right now" feed.
|
| 32 |
+
- If the user says "trending" but also adds searchable constraints like `pipeline_tag`, `author`, search text, or `num_params` bounds, prefer the repo search helper sorted by `trending_score`.
|
| 33 |
+
- Think of search helpers as filter-first discovery and `hf_trending(...)` as rank-first current-feed inspection.
|
| 34 |
+
|
| 35 |
+
## Parameter notes
|
| 36 |
+
|
| 37 |
+
- Trust the generated helper contracts below for per-helper params, fields, sort keys, expand values, and defaults.
|
| 38 |
+
- When the user asks for helper-owned coverage metadata, use `helper_resp["meta"]`.
|
| 39 |
+
- Treat any of the following helper-meta signals as coverage-sensitive: `limit_boundary_hit`, `truncated`, `more_available` not equal to `False`, `sample_complete=false`, `exact_count=false`, `ranking_complete=false`, `ranking_window_hit=true`, or `hard_cap_applied=true`. In those cases, do **not** return bare items; return `{"results": ..., "coverage": ...}`.
|
| 40 |
+
- For pro-only follower/member/liker queries, prefer `pro_only=True` instead of filtering on a projected field.
|
| 41 |
+
- `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
|
| 42 |
+
- When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
|
| 43 |
+
- For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
|
| 44 |
+
- Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
|
| 45 |
+
|
| 46 |
+
- Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
|
| 47 |
+
- Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
|
| 48 |
+
- Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
|
| 49 |
+
- For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
|
| 50 |
+
- For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
|
| 51 |
+
- Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
|
| 52 |
+
- Push constraints upstream whenever a first-class helper argument exists.
|
| 53 |
+
- `post_filter` is only for normalized row filters that cannot be pushed upstream.
|
| 54 |
+
- Keep `post_filter` simple:
|
| 55 |
+
- exact match or `in` for returned fields like `runtime_stage`
|
| 56 |
+
- `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
|
| 57 |
+
- `num_params` is one of the main valid reasons to use `post_filter` on model search today.
|
| 58 |
+
- Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
|
| 59 |
+
|
| 60 |
+
Examples:
|
| 61 |
+
|
| 62 |
+
```py
|
| 63 |
+
await hf_models_search(pipeline_tag="text-to-image", limit=10)
|
| 64 |
+
await hf_datasets_search(search="speech", sort="downloads", limit=10)
|
| 65 |
+
await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
|
| 66 |
+
await hf_models_search(
|
| 67 |
+
pipeline_tag="text-generation",
|
| 68 |
+
sort="trending_score",
|
| 69 |
+
limit=50,
|
| 70 |
+
post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
|
| 71 |
+
)
|
| 72 |
+
await hf_collections_search(owner="Qwen", limit=10)
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
Field-only pattern:
|
| 76 |
+
|
| 77 |
+
```py
|
| 78 |
+
resp = await hf_models_search(
|
| 79 |
+
pipeline_tag="text-to-image",
|
| 80 |
+
fields=["repo_id", "author", "likes", "downloads", "repo_url"],
|
| 81 |
+
limit=3,
|
| 82 |
+
)
|
| 83 |
+
return resp["items"]
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
Coverage pattern:
|
| 87 |
+
|
| 88 |
+
```py
|
| 89 |
+
resp = await hf_user_likes(
|
| 90 |
+
username="julien-c",
|
| 91 |
+
sort="repo_likes",
|
| 92 |
+
ranking_window=50,
|
| 93 |
+
limit=20,
|
| 94 |
+
fields=["repo_id", "repo_likes", "repo_url"],
|
| 95 |
+
)
|
| 96 |
+
return {"results": resp["items"], "coverage": resp["meta"]}
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
Owner-inventory pattern:
|
| 100 |
+
|
| 101 |
+
```py
|
| 102 |
+
profile = await hf_profile_summary(handle="huggingface")
|
| 103 |
+
count = (profile.get("item") or {}).get("spaces_count")
|
| 104 |
+
limit = 200 if not isinstance(count, int) else min(max(count, 1), 200)
|
| 105 |
+
resp = await hf_spaces_search(
|
| 106 |
+
author="huggingface",
|
| 107 |
+
limit=limit,
|
| 108 |
+
fields=["repo_id", "repo_url"],
|
| 109 |
+
)
|
| 110 |
+
meta = resp.get("meta") or {}
|
| 111 |
+
if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
|
| 112 |
+
return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
|
| 113 |
+
return resp["items"]
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
Profile-count pattern:
|
| 117 |
+
|
| 118 |
+
```py
|
| 119 |
+
profile = await hf_profile_summary(handle="mishig")
|
| 120 |
+
item = profile["item"] or {}
|
| 121 |
+
return {
|
| 122 |
+
"followers_count": item.get("followers_count"),
|
| 123 |
+
"following_count": item.get("following_count"),
|
| 124 |
+
}
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
Pro-followers pattern:
|
| 128 |
+
|
| 129 |
+
```py
|
| 130 |
+
followers = await hf_user_graph(
|
| 131 |
+
relation="followers",
|
| 132 |
+
pro_only=True,
|
| 133 |
+
limit=20,
|
| 134 |
+
fields=["username"],
|
| 135 |
+
)
|
| 136 |
+
return followers["items"]
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
## Navigation graph
|
| 140 |
+
|
| 141 |
+
Use the helper that matches the question type.
|
| 142 |
+
|
| 143 |
+
- exact repo details → `hf_repo_details(...)`
|
| 144 |
+
- model search/list/discovery → `hf_models_search(...)`
|
| 145 |
+
- dataset search/list/discovery → `hf_datasets_search(...)`
|
| 146 |
+
- space search/list/discovery → `hf_spaces_search(...)`
|
| 147 |
+
- cross-type repo search → `hf_repo_search(...)`
|
| 148 |
+
- trending repos → `hf_trending(...)`
|
| 149 |
+
- daily papers → `hf_daily_papers(...)`
|
| 150 |
+
- repo discussions → `hf_repo_discussions(...)`
|
| 151 |
+
- specific discussion details → `hf_repo_discussion_details(...)`
|
| 152 |
+
- users who liked one repo → `hf_repo_likers(...)`
|
| 153 |
+
- profile / overview / aggregate counts → `hf_profile_summary(...)`
|
| 154 |
+
- followers / following lists → `hf_user_graph(...)`
|
| 155 |
+
- repos a user liked → `hf_user_likes(...)`
|
| 156 |
+
- recent activity feed → `hf_recent_activity(...)`
|
| 157 |
+
- organization members → `hf_org_members(...)`
|
| 158 |
+
- collections search → `hf_collections_search(...)`
|
| 159 |
+
- items inside a known collection → `hf_collection_items(...)`
|
| 160 |
+
- explicit current username → `hf_whoami()`
|
| 161 |
+
|
| 162 |
+
Direction reminders:
|
| 163 |
+
- `hf_user_likes(...)` = user → repos
|
| 164 |
+
- `hf_repo_likers(...)` = repo → users
|
| 165 |
+
- `hf_user_graph(...)` = user/org → followers/following
|
| 166 |
+
|
| 167 |
+
## Helper result shape
|
| 168 |
+
|
| 169 |
+
All helpers return:
|
| 170 |
+
|
| 171 |
+
```py
|
| 172 |
+
{
|
| 173 |
+
"ok": bool,
|
| 174 |
+
"item": dict | None,
|
| 175 |
+
"items": list[dict],
|
| 176 |
+
"meta": dict,
|
| 177 |
+
"error": str | None,
|
| 178 |
+
}
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
Rules:
|
| 182 |
+
- `items` is the canonical list field.
|
| 183 |
+
- `item` is just a singleton convenience.
|
| 184 |
+
- `meta` contains helper-owned execution, limit, and coverage info.
|
| 185 |
+
- When helper-owned coverage matters, prefer returning the helper envelope directly.
|
| 186 |
+
|
| 187 |
+
## High-signal output rules
|
| 188 |
+
|
| 189 |
+
- Prefer compact dict/list outputs over prose when the user asked for fields.
|
| 190 |
+
- Prefer summary helpers before detail hydration.
|
| 191 |
+
- Use canonical snake_case keys in generated code and structured output.
|
| 192 |
+
- Use `repo_id` as the display label for repos.
|
| 193 |
+
- Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
|
| 194 |
+
- For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
|
| 195 |
+
- For joins/intersections/rankings, fetch the needed working set first and compute locally.
|
| 196 |
+
- If the result is partial, use top-level keys `results` and `coverage`.
|
| 197 |
+
|
| 198 |
+
{{GENERATED_HELPER_SIGNATURES}}
|
| 199 |
+
|
| 200 |
+
{{GENERATED_HELPER_CONTRACTS}}
|
.prod/agent-cards/shared/_monty_helper_contracts.md
ADDED
|
@@ -0,0 +1,424 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Helper contracts (generated from runtime + wrapper metadata)
|
| 2 |
+
|
| 3 |
+
These contracts describe the normalized wrapper surface exposed to generated code.
|
| 4 |
+
Field names and helper-visible enum values are canonical snake_case wrapper names.
|
| 5 |
+
|
| 6 |
+
All helpers return the same envelope: `{ok, item, items, meta, error}`.
|
| 7 |
+
|
| 8 |
+
### hf_collection_items
|
| 9 |
+
|
| 10 |
+
- category: `collection_navigation`
|
| 11 |
+
- returns:
|
| 12 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 13 |
+
- row_type: `repo`
|
| 14 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 15 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `repo_url`
|
| 16 |
+
- optional_fields: `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 17 |
+
- supported_params: `collection_id`, `repo_types`, `limit`, `count_only`, `where`, `fields`
|
| 18 |
+
- param_values:
|
| 19 |
+
- repo_types: `model`, `dataset`, `space`
|
| 20 |
+
- fields_contract:
|
| 21 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 22 |
+
- canonical_only: `true`
|
| 23 |
+
- where_contract:
|
| 24 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 25 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 26 |
+
- normalized_only: `true`
|
| 27 |
+
- limit_contract:
|
| 28 |
+
- default_limit: `100`
|
| 29 |
+
- max_limit: `500`
|
| 30 |
+
- notes: Returns repos inside one collection as summary rows.
|
| 31 |
+
|
| 32 |
+
### hf_collections_search
|
| 33 |
+
|
| 34 |
+
- category: `collection_search`
|
| 35 |
+
- returns:
|
| 36 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 37 |
+
- row_type: `collection`
|
| 38 |
+
- default_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 39 |
+
- guaranteed_fields: `collection_id`, `title`, `owner`
|
| 40 |
+
- optional_fields: `slug`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 41 |
+
- supported_params: `query`, `owner`, `limit`, `count_only`, `where`, `fields`
|
| 42 |
+
- fields_contract:
|
| 43 |
+
- allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 44 |
+
- canonical_only: `true`
|
| 45 |
+
- where_contract:
|
| 46 |
+
- allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
|
| 47 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 48 |
+
- normalized_only: `true`
|
| 49 |
+
- limit_contract:
|
| 50 |
+
- default_limit: `20`
|
| 51 |
+
- max_limit: `500`
|
| 52 |
+
- notes: Collection summary helper.
|
| 53 |
+
|
| 54 |
+
### hf_daily_papers
|
| 55 |
+
|
| 56 |
+
- category: `curated_feed`
|
| 57 |
+
- returns:
|
| 58 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 59 |
+
- row_type: `daily_paper`
|
| 60 |
+
- default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 61 |
+
- guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
|
| 62 |
+
- optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
|
| 63 |
+
- supported_params: `limit`, `where`, `fields`
|
| 64 |
+
- fields_contract:
|
| 65 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 66 |
+
- canonical_only: `true`
|
| 67 |
+
- where_contract:
|
| 68 |
+
- allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
|
| 69 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 70 |
+
- normalized_only: `true`
|
| 71 |
+
- limit_contract:
|
| 72 |
+
- default_limit: `20`
|
| 73 |
+
- max_limit: `500`
|
| 74 |
+
- notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
|
| 75 |
+
|
| 76 |
+
### hf_datasets_search
|
| 77 |
+
|
| 78 |
+
- category: `wrapped_hf_repo_search`
|
| 79 |
+
- backed_by: `HfApi.list_datasets`
|
| 80 |
+
- returns:
|
| 81 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 82 |
+
- row_type: `repo`
|
| 83 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 84 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 85 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 86 |
+
- supported_params: `search`, `filter`, `author`, `benchmark`, `dataset_name`, `gated`, `language_creators`, `language`, `multilinguality`, `size_categories`, `task_categories`, `task_ids`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
|
| 87 |
+
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 88 |
+
- expand_values: `author`, `card_data`, `citation`, `created_at`, `description`, `disabled`, `downloads`, `downloads_all_time`, `gated`, `last_modified`, `likes`, `paperswithcode_id`, `private`, `resource_group`, `sha`, `siblings`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
|
| 89 |
+
- fields_contract:
|
| 90 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 91 |
+
- canonical_only: `true`
|
| 92 |
+
- post_filter_contract:
|
| 93 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 94 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 95 |
+
- normalized_only: `true`
|
| 96 |
+
- limit_contract:
|
| 97 |
+
- default_limit: `20`
|
| 98 |
+
- max_limit: `5000`
|
| 99 |
+
- notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 100 |
+
|
| 101 |
+
### hf_models_search
|
| 102 |
+
|
| 103 |
+
- category: `wrapped_hf_repo_search`
|
| 104 |
+
- backed_by: `HfApi.list_models`
|
| 105 |
+
- returns:
|
| 106 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 107 |
+
- row_type: `repo`
|
| 108 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 109 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 110 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 111 |
+
- supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
|
| 112 |
+
- sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 113 |
+
- expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
|
| 114 |
+
- fields_contract:
|
| 115 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 116 |
+
- canonical_only: `true`
|
| 117 |
+
- post_filter_contract:
|
| 118 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 119 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 120 |
+
- normalized_only: `true`
|
| 121 |
+
- limit_contract:
|
| 122 |
+
- default_limit: `20`
|
| 123 |
+
- max_limit: `5000`
|
| 124 |
+
- notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 125 |
+
|
| 126 |
+
### hf_org_members
|
| 127 |
+
|
| 128 |
+
- category: `graph_scan`
|
| 129 |
+
- returns:
|
| 130 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 131 |
+
- row_type: `actor`
|
| 132 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 133 |
+
- guaranteed_fields: `username`
|
| 134 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 135 |
+
- supported_params: `organization`, `limit`, `scan_limit`, `count_only`, `where`, `fields`
|
| 136 |
+
- fields_contract:
|
| 137 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 138 |
+
- canonical_only: `true`
|
| 139 |
+
- where_contract:
|
| 140 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 141 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 142 |
+
- normalized_only: `true`
|
| 143 |
+
- limit_contract:
|
| 144 |
+
- default_limit: `1000`
|
| 145 |
+
- max_limit: `10000`
|
| 146 |
+
- scan_max: `10000`
|
| 147 |
+
- notes: Returns organization member summary rows.
|
| 148 |
+
|
| 149 |
+
### hf_profile_summary
|
| 150 |
+
|
| 151 |
+
- category: `profile_summary`
|
| 152 |
+
- returns:
|
| 153 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 154 |
+
- row_type: `profile`
|
| 155 |
+
- default_fields: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
|
| 156 |
+
- guaranteed_fields: `handle`, `entity_type`
|
| 157 |
+
- optional_fields: `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
|
| 158 |
+
- supported_params: `handle`, `include`, `likes_limit`, `activity_limit`
|
| 159 |
+
- param_values:
|
| 160 |
+
- include: `likes`, `activity`
|
| 161 |
+
- notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
|
| 162 |
+
|
| 163 |
+
### hf_recent_activity
|
| 164 |
+
|
| 165 |
+
- category: `activity_feed`
|
| 166 |
+
- returns:
|
| 167 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 168 |
+
- row_type: `activity`
|
| 169 |
+
- default_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 170 |
+
- guaranteed_fields: `event_type`, `timestamp`
|
| 171 |
+
- optional_fields: `repo_id`, `repo_type`
|
| 172 |
+
- supported_params: `feed_type`, `entity`, `activity_types`, `repo_types`, `limit`, `max_pages`, `start_cursor`, `count_only`, `where`, `fields`
|
| 173 |
+
- param_values:
|
| 174 |
+
- feed_type: `user`, `org`
|
| 175 |
+
- repo_types: `model`, `dataset`, `space`
|
| 176 |
+
- fields_contract:
|
| 177 |
+
- allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 178 |
+
- canonical_only: `true`
|
| 179 |
+
- where_contract:
|
| 180 |
+
- allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
|
| 181 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 182 |
+
- normalized_only: `true`
|
| 183 |
+
- limit_contract:
|
| 184 |
+
- default_limit: `100`
|
| 185 |
+
- max_limit: `2000`
|
| 186 |
+
- max_pages: `10`
|
| 187 |
+
- page_limit: `100`
|
| 188 |
+
- notes: Activity helper may fetch multiple pages when requested coverage exceeds one page. count_only may still be a lower bound unless the feed exhausts before max_pages.
|
| 189 |
+
|
| 190 |
+
### hf_repo_details
|
| 191 |
+
|
| 192 |
+
- category: `repo_detail`
|
| 193 |
+
- returns:
|
| 194 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 195 |
+
- row_type: `repo`
|
| 196 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 197 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 198 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 199 |
+
- supported_params: `repo_id`, `repo_ids`, `repo_type`, `fields`
|
| 200 |
+
- param_values:
|
| 201 |
+
- repo_type: `model`, `dataset`, `space`, `auto`
|
| 202 |
+
- fields_contract:
|
| 203 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 204 |
+
- canonical_only: `true`
|
| 205 |
+
- notes: Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.
|
| 206 |
+
|
| 207 |
+
### hf_repo_discussion_details
|
| 208 |
+
|
| 209 |
+
- category: `discussion_detail`
|
| 210 |
+
- returns:
|
| 211 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 212 |
+
- row_type: `discussion_detail`
|
| 213 |
+
- default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 214 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `title`, `author`, `status`
|
| 215 |
+
- optional_fields: `num`, `created_at`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 216 |
+
- supported_params: `repo_type`, `repo_id`, `discussion_num`, `fields`
|
| 217 |
+
- param_values:
|
| 218 |
+
- repo_type: `model`, `dataset`, `space`
|
| 219 |
+
- fields_contract:
|
| 220 |
+
- allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
|
| 221 |
+
- canonical_only: `true`
|
| 222 |
+
- notes: Exact discussion detail helper.
|
| 223 |
+
|
| 224 |
+
### hf_repo_discussions
|
| 225 |
+
|
| 226 |
+
- category: `discussion_summary`
|
| 227 |
+
- returns:
|
| 228 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 229 |
+
- row_type: `discussion`
|
| 230 |
+
- default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
|
| 231 |
+
- guaranteed_fields: `num`, `title`, `author`, `status`
|
| 232 |
+
- optional_fields: `repo_id`, `repo_type`, `created_at`, `url`
|
| 233 |
+
- supported_params: `repo_type`, `repo_id`, `limit`, `fields`
|
| 234 |
+
- param_values:
|
| 235 |
+
- repo_type: `model`, `dataset`, `space`
|
| 236 |
+
- fields_contract:
|
| 237 |
+
- allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
|
| 238 |
+
- canonical_only: `true`
|
| 239 |
+
- limit_contract:
|
| 240 |
+
- default_limit: `20`
|
| 241 |
+
- max_limit: `200`
|
| 242 |
+
- notes: Discussion summary helper.
|
| 243 |
+
|
| 244 |
+
### hf_repo_likers
|
| 245 |
+
|
| 246 |
+
- category: `repo_to_users`
|
| 247 |
+
- returns:
|
| 248 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 249 |
+
- row_type: `actor`
|
| 250 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 251 |
+
- guaranteed_fields: `username`
|
| 252 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 253 |
+
- supported_params: `repo_id`, `repo_type`, `limit`, `count_only`, `pro_only`, `where`, `fields`
|
| 254 |
+
- param_values:
|
| 255 |
+
- repo_type: `model`, `dataset`, `space`
|
| 256 |
+
- fields_contract:
|
| 257 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 258 |
+
- canonical_only: `true`
|
| 259 |
+
- where_contract:
|
| 260 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 261 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 262 |
+
- normalized_only: `true`
|
| 263 |
+
- limit_contract:
|
| 264 |
+
- default_limit: `1000`
|
| 265 |
+
- notes: Returns users who liked a repo.
|
| 266 |
+
|
| 267 |
+
### hf_repo_search
|
| 268 |
+
|
| 269 |
+
- category: `cross_type_repo_search`
|
| 270 |
+
- returns:
|
| 271 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 272 |
+
- row_type: `repo`
|
| 273 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 274 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 275 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 276 |
+
- supported_params: `search`, `repo_type`, `repo_types`, `filter`, `author`, `sort`, `limit`, `fields`, `post_filter`
|
| 277 |
+
- sort_values_by_repo_type:
|
| 278 |
+
- dataset: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 279 |
+
- model: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 280 |
+
- space: `created_at`, `last_modified`, `likes`, `trending_score`
|
| 281 |
+
- param_values:
|
| 282 |
+
- repo_type: `model`, `dataset`, `space`
|
| 283 |
+
- repo_types: `model`, `dataset`, `space`
|
| 284 |
+
- sort: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
|
| 285 |
+
- fields_contract:
|
| 286 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 287 |
+
- canonical_only: `true`
|
| 288 |
+
- post_filter_contract:
|
| 289 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 290 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 291 |
+
- normalized_only: `true`
|
| 292 |
+
- limit_contract:
|
| 293 |
+
- default_limit: `20`
|
| 294 |
+
- max_limit: `5000`
|
| 295 |
+
- notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 296 |
+
|
| 297 |
+
### hf_runtime_capabilities
|
| 298 |
+
|
| 299 |
+
- category: `introspection`
|
| 300 |
+
- returns:
|
| 301 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 302 |
+
- row_type: `runtime_capability`
|
| 303 |
+
- default_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 304 |
+
- guaranteed_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 305 |
+
- optional_fields: []
|
| 306 |
+
- supported_params: `section`
|
| 307 |
+
- param_values:
|
| 308 |
+
- section: `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
|
| 309 |
+
- notes: Introspection helper. Use section=... to narrow the response.
|
| 310 |
+
|
| 311 |
+
### hf_spaces_search
|
| 312 |
+
|
| 313 |
+
- category: `wrapped_hf_repo_search`
|
| 314 |
+
- backed_by: `HfApi.list_spaces`
|
| 315 |
+
- returns:
|
| 316 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 317 |
+
- row_type: `repo`
|
| 318 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 319 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
|
| 320 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 321 |
+
- supported_params: `search`, `filter`, `author`, `datasets`, `models`, `linked`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
|
| 322 |
+
- sort_values: `created_at`, `last_modified`, `likes`, `trending_score`
|
| 323 |
+
- expand_values: `author`, `card_data`, `created_at`, `datasets`, `disabled`, `last_modified`, `likes`, `models`, `private`, `resource_group`, `runtime`, `sdk`, `sha`, `siblings`, `subdomain`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
|
| 324 |
+
- fields_contract:
|
| 325 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 326 |
+
- canonical_only: `true`
|
| 327 |
+
- post_filter_contract:
|
| 328 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 329 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 330 |
+
- normalized_only: `true`
|
| 331 |
+
- limit_contract:
|
| 332 |
+
- default_limit: `20`
|
| 333 |
+
- max_limit: `5000`
|
| 334 |
+
- notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
|
| 335 |
+
|
| 336 |
+
### hf_trending
|
| 337 |
+
|
| 338 |
+
- category: `curated_repo_feed`
|
| 339 |
+
- returns:
|
| 340 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 341 |
+
- row_type: `repo`
|
| 342 |
+
- default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 343 |
+
- guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`, `trending_rank`
|
| 344 |
+
- optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
|
| 345 |
+
- supported_params: `repo_type`, `limit`, `where`, `fields`
|
| 346 |
+
- param_values:
|
| 347 |
+
- repo_type: `model`, `dataset`, `space`, `all`
|
| 348 |
+
- fields_contract:
|
| 349 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 350 |
+
- canonical_only: `true`
|
| 351 |
+
- where_contract:
|
| 352 |
+
- allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
|
| 353 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 354 |
+
- normalized_only: `true`
|
| 355 |
+
- limit_contract:
|
| 356 |
+
- default_limit: `20`
|
| 357 |
+
- max_limit: `20`
|
| 358 |
+
- notes: Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.
|
| 359 |
+
|
| 360 |
+
### hf_user_graph
|
| 361 |
+
|
| 362 |
+
- category: `graph_scan`
|
| 363 |
+
- returns:
|
| 364 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 365 |
+
- row_type: `actor`
|
| 366 |
+
- default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 367 |
+
- guaranteed_fields: `username`
|
| 368 |
+
- optional_fields: `fullname`, `is_pro`, `role`, `type`
|
| 369 |
+
- supported_params: `username`, `relation`, `limit`, `scan_limit`, `count_only`, `pro_only`, `where`, `fields`
|
| 370 |
+
- param_values:
|
| 371 |
+
- relation: `followers`, `following`
|
| 372 |
+
- fields_contract:
|
| 373 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 374 |
+
- canonical_only: `true`
|
| 375 |
+
- where_contract:
|
| 376 |
+
- allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
|
| 377 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 378 |
+
- normalized_only: `true`
|
| 379 |
+
- limit_contract:
|
| 380 |
+
- default_limit: `1000`
|
| 381 |
+
- max_limit: `10000`
|
| 382 |
+
- scan_max: `10000`
|
| 383 |
+
- notes: Returns followers/following summary rows.
|
| 384 |
+
|
| 385 |
+
### hf_user_likes
|
| 386 |
+
|
| 387 |
+
- category: `user_to_repos`
|
| 388 |
+
- returns:
|
| 389 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 390 |
+
- row_type: `user_like`
|
| 391 |
+
- default_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 392 |
+
- guaranteed_fields: `liked_at`, `repo_id`, `repo_type`
|
| 393 |
+
- optional_fields: `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 394 |
+
- supported_params: `username`, `repo_types`, `limit`, `scan_limit`, `count_only`, `where`, `fields`, `sort`, `ranking_window`
|
| 395 |
+
- sort_values: `liked_at`, `repo_likes`, `repo_downloads`
|
| 396 |
+
- param_values:
|
| 397 |
+
- repo_types: `model`, `dataset`, `space`
|
| 398 |
+
- sort: `liked_at`, `repo_likes`, `repo_downloads`
|
| 399 |
+
- fields_contract:
|
| 400 |
+
- allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 401 |
+
- canonical_only: `true`
|
| 402 |
+
- where_contract:
|
| 403 |
+
- allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
|
| 404 |
+
- supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
|
| 405 |
+
- normalized_only: `true`
|
| 406 |
+
- limit_contract:
|
| 407 |
+
- default_limit: `100`
|
| 408 |
+
- max_limit: `2000`
|
| 409 |
+
- enrich_max: `50`
|
| 410 |
+
- ranking_default: `50`
|
| 411 |
+
- scan_max: `10000`
|
| 412 |
+
- notes: Default recency mode is cheap. Popularity-ranked sorts use canonical keys liked_at/repo_likes/repo_downloads and rerank only a bounded recent shortlist. Check meta.ranking_complete / meta.ranking_window when ranking by popularity; helper-owned coverage matters here.
|
| 413 |
+
|
| 414 |
+
### hf_whoami
|
| 415 |
+
|
| 416 |
+
- category: `identity`
|
| 417 |
+
- returns:
|
| 418 |
+
- envelope: `{ok, item, items, meta, error}`
|
| 419 |
+
- row_type: `user`
|
| 420 |
+
- default_fields: `username`, `fullname`, `is_pro`
|
| 421 |
+
- guaranteed_fields: `username`
|
| 422 |
+
- optional_fields: `fullname`, `is_pro`
|
| 423 |
+
- supported_params: []
|
| 424 |
+
- notes: Returns the current authenticated user when a request token is available.
|
.prod/agent-cards/shared/_monty_helper_signatures.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Helper signatures (generated from Python)
|
| 2 |
+
|
| 3 |
+
These signatures are exported from the live runtime with `inspect.signature(...)`.
|
| 4 |
+
If prompt prose and signatures disagree, trust these signatures.
|
| 5 |
+
|
| 6 |
+
```py
|
| 7 |
+
await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' = None, limit: 'int' = 100, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 8 |
+
|
| 9 |
+
await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 10 |
+
|
| 11 |
+
await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 12 |
+
|
| 13 |
+
await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 14 |
+
|
| 15 |
+
await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 16 |
+
|
| 17 |
+
await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 18 |
+
|
| 19 |
+
await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
|
| 20 |
+
|
| 21 |
+
await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 22 |
+
|
| 23 |
+
await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 24 |
+
|
| 25 |
+
await hf_repo_discussion_details(repo_type: 'str', repo_id: 'str', discussion_num: 'int', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 26 |
+
|
| 27 |
+
await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 28 |
+
|
| 29 |
+
await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 30 |
+
|
| 31 |
+
await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 20, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 32 |
+
|
| 33 |
+
await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
|
| 34 |
+
|
| 35 |
+
await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
|
| 36 |
+
|
| 37 |
+
await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 38 |
+
|
| 39 |
+
await hf_user_graph(username: 'str | None' = None, relation: 'str' = 'followers', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
|
| 40 |
+
|
| 41 |
+
await hf_user_likes(username: 'str | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None, sort: 'str | None' = None, ranking_window: 'int | None' = None) -> 'dict[str, Any]'
|
| 42 |
+
|
| 43 |
+
await hf_whoami() -> 'dict[str, Any]'
|
| 44 |
+
```
|
.prod/monty_api/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from .registry import HELPER_EXTERNALS
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def __getattr__(name: str): # pragma: no cover - tiny import shim
|
| 7 |
+
if name in {"hf_hub_query", "hf_hub_query_raw", "main"}:
|
| 8 |
+
from .query_entrypoints import hf_hub_query, hf_hub_query_raw, main
|
| 9 |
+
|
| 10 |
+
exports = {
|
| 11 |
+
"hf_hub_query": hf_hub_query,
|
| 12 |
+
"hf_hub_query_raw": hf_hub_query_raw,
|
| 13 |
+
"main": main,
|
| 14 |
+
}
|
| 15 |
+
return exports[name]
|
| 16 |
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
| 17 |
+
|
| 18 |
+
__all__ = [
|
| 19 |
+
"HELPER_EXTERNALS",
|
| 20 |
+
"hf_hub_query",
|
| 21 |
+
"hf_hub_query_raw",
|
| 22 |
+
"main",
|
| 23 |
+
]
|
.prod/monty_api/aliases.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import get_args
|
| 4 |
+
|
| 5 |
+
try:
|
| 6 |
+
from huggingface_hub.hf_api import DatasetSort_T, ModelSort_T, SpaceSort_T
|
| 7 |
+
except ModuleNotFoundError: # pragma: no cover - dependency-light test/import path
|
| 8 |
+
DatasetSort_T = ()
|
| 9 |
+
ModelSort_T = ()
|
| 10 |
+
SpaceSort_T = ()
|
| 11 |
+
|
| 12 |
+
REPO_SORT_KEYS: dict[str, set[str]] = {
|
| 13 |
+
"model": set(get_args(ModelSort_T))
|
| 14 |
+
or {
|
| 15 |
+
"created_at",
|
| 16 |
+
"downloads",
|
| 17 |
+
"last_modified",
|
| 18 |
+
"likes",
|
| 19 |
+
"trending_score",
|
| 20 |
+
},
|
| 21 |
+
"dataset": set(get_args(DatasetSort_T))
|
| 22 |
+
or {
|
| 23 |
+
"created_at",
|
| 24 |
+
"downloads",
|
| 25 |
+
"last_modified",
|
| 26 |
+
"likes",
|
| 27 |
+
"trending_score",
|
| 28 |
+
},
|
| 29 |
+
"space": set(get_args(SpaceSort_T))
|
| 30 |
+
or {
|
| 31 |
+
"created_at",
|
| 32 |
+
"last_modified",
|
| 33 |
+
"likes",
|
| 34 |
+
"trending_score",
|
| 35 |
+
},
|
| 36 |
+
}
|
.prod/monty_api/constants.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
DEFAULT_TIMEOUT_SEC = 90 # Default end-to-end timeout for one Monty run.
|
| 4 |
+
|
| 5 |
+
DEFAULT_MAX_CALLS = 400 # Default external-call budget exposed to callers.
|
| 6 |
+
|
| 7 |
+
MAX_CALLS_LIMIT = 400 # Absolute max external-call budget accepted by the runtime.
|
| 8 |
+
|
| 9 |
+
INTERNAL_STRICT_MODE = False
|
| 10 |
+
|
| 11 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT = (
|
| 12 |
+
500 # Final output truncation for oversized `items` payloads.
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
EXHAUSTIVE_HELPER_RETURN_HARD_CAP = (
|
| 16 |
+
2_000 # Runtime hard cap for exhaustive-helper output rows.
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
SELECTIVE_ENDPOINT_RETURN_HARD_CAP = (
|
| 20 |
+
200 # Default cap for one-shot selective endpoint helpers.
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
TRENDING_ENDPOINT_MAX_LIMIT = 20 # Upstream `/api/trending` endpoint maximum.
|
| 24 |
+
|
| 25 |
+
GRAPH_SCAN_LIMIT_CAP = 10_000 # Max follower/member rows scanned in one helper call.
|
| 26 |
+
|
| 27 |
+
LIKES_SCAN_LIMIT_CAP = 10_000 # Max like-event rows scanned in one helper call.
|
| 28 |
+
|
| 29 |
+
LIKES_RANKING_WINDOW_DEFAULT = (
|
| 30 |
+
50 # Default shortlist size when ranking likes by repo popularity.
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
LIKES_ENRICHMENT_MAX_REPOS = (
|
| 34 |
+
50 # Max liked repos enriched with extra repo-detail calls.
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
RECENT_ACTIVITY_PAGE_SIZE = 100 # Rows requested per `/api/recent-activity` page.
|
| 38 |
+
|
| 39 |
+
RECENT_ACTIVITY_SCAN_MAX_PAGES = (
|
| 40 |
+
10 # Max recent-activity pages fetched in one helper call.
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
USER_SUMMARY_LIKES_SCAN_LIMIT = 1_000 # Like rows sampled for user summary.
|
| 44 |
+
|
| 45 |
+
USER_SUMMARY_ACTIVITY_MAX_PAGES = 3 # Activity pages sampled for user summary.
|
| 46 |
+
|
| 47 |
+
DEFAULT_MONTY_MAX_MEMORY = 64 * 1024 * 1024 # 64 MiB
|
| 48 |
+
|
| 49 |
+
DEFAULT_MONTY_MAX_ALLOCATIONS = (
|
| 50 |
+
250_000 # Approximate object-allocation ceiling in the sandbox.
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
DEFAULT_MONTY_MAX_RECURSION_DEPTH = 100 # Python recursion limit inside the sandbox.
|
| 54 |
+
|
| 55 |
+
REPO_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 56 |
+
"repo_id",
|
| 57 |
+
"repo_type",
|
| 58 |
+
"author",
|
| 59 |
+
"likes",
|
| 60 |
+
"downloads",
|
| 61 |
+
"trending_score",
|
| 62 |
+
"created_at",
|
| 63 |
+
"last_modified",
|
| 64 |
+
"pipeline_tag",
|
| 65 |
+
"num_params",
|
| 66 |
+
"repo_url",
|
| 67 |
+
"tags",
|
| 68 |
+
"library_name",
|
| 69 |
+
"description",
|
| 70 |
+
"paperswithcode_id",
|
| 71 |
+
"sdk",
|
| 72 |
+
"models",
|
| 73 |
+
"datasets",
|
| 74 |
+
"subdomain",
|
| 75 |
+
"runtime_stage",
|
| 76 |
+
"runtime",
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
USER_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 80 |
+
"username",
|
| 81 |
+
"fullname",
|
| 82 |
+
"bio",
|
| 83 |
+
"website_url",
|
| 84 |
+
"twitter",
|
| 85 |
+
"github",
|
| 86 |
+
"linkedin",
|
| 87 |
+
"bluesky",
|
| 88 |
+
"followers",
|
| 89 |
+
"following",
|
| 90 |
+
"likes",
|
| 91 |
+
"is_pro",
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
PROFILE_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 95 |
+
"handle",
|
| 96 |
+
"entity_type",
|
| 97 |
+
"display_name",
|
| 98 |
+
"bio",
|
| 99 |
+
"description",
|
| 100 |
+
"avatar_url",
|
| 101 |
+
"website_url",
|
| 102 |
+
"twitter_url",
|
| 103 |
+
"github_url",
|
| 104 |
+
"linkedin_url",
|
| 105 |
+
"bluesky_url",
|
| 106 |
+
"followers_count",
|
| 107 |
+
"following_count",
|
| 108 |
+
"likes_count",
|
| 109 |
+
"members_count",
|
| 110 |
+
"models_count",
|
| 111 |
+
"datasets_count",
|
| 112 |
+
"spaces_count",
|
| 113 |
+
"discussions_count",
|
| 114 |
+
"papers_count",
|
| 115 |
+
"upvotes_count",
|
| 116 |
+
"organizations",
|
| 117 |
+
"is_pro",
|
| 118 |
+
"likes_sample",
|
| 119 |
+
"activity_sample",
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
ACTOR_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 123 |
+
"username",
|
| 124 |
+
"fullname",
|
| 125 |
+
"is_pro",
|
| 126 |
+
"role",
|
| 127 |
+
"type",
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
USER_LIKES_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 131 |
+
"liked_at",
|
| 132 |
+
"repo_id",
|
| 133 |
+
"repo_type",
|
| 134 |
+
"repo_author",
|
| 135 |
+
"repo_likes",
|
| 136 |
+
"repo_downloads",
|
| 137 |
+
"repo_url",
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
DISCUSSION_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 141 |
+
"num",
|
| 142 |
+
"repo_id",
|
| 143 |
+
"repo_type",
|
| 144 |
+
"title",
|
| 145 |
+
"author",
|
| 146 |
+
"created_at",
|
| 147 |
+
"status",
|
| 148 |
+
"url",
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 152 |
+
"num",
|
| 153 |
+
"repo_id",
|
| 154 |
+
"repo_type",
|
| 155 |
+
"title",
|
| 156 |
+
"author",
|
| 157 |
+
"created_at",
|
| 158 |
+
"status",
|
| 159 |
+
"url",
|
| 160 |
+
"comment_count",
|
| 161 |
+
"latest_comment_author",
|
| 162 |
+
"latest_comment_created_at",
|
| 163 |
+
"latest_comment_text",
|
| 164 |
+
"latest_comment_html",
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
ACTIVITY_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 168 |
+
"event_type",
|
| 169 |
+
"repo_id",
|
| 170 |
+
"repo_type",
|
| 171 |
+
"timestamp",
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
COLLECTION_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 175 |
+
"collection_id",
|
| 176 |
+
"slug",
|
| 177 |
+
"title",
|
| 178 |
+
"owner",
|
| 179 |
+
"owner_type",
|
| 180 |
+
"description",
|
| 181 |
+
"gating",
|
| 182 |
+
"last_updated",
|
| 183 |
+
"item_count",
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
DAILY_PAPER_CANONICAL_FIELDS: tuple[str, ...] = (
|
| 187 |
+
"paper_id",
|
| 188 |
+
"title",
|
| 189 |
+
"summary",
|
| 190 |
+
"published_at",
|
| 191 |
+
"submitted_on_daily_at",
|
| 192 |
+
"authors",
|
| 193 |
+
"organization",
|
| 194 |
+
"submitted_by",
|
| 195 |
+
"discussion_id",
|
| 196 |
+
"upvotes",
|
| 197 |
+
"github_repo_url",
|
| 198 |
+
"github_stars",
|
| 199 |
+
"project_page_url",
|
| 200 |
+
"num_comments",
|
| 201 |
+
"is_author_participating",
|
| 202 |
+
"repo_id",
|
| 203 |
+
"rank",
|
| 204 |
+
)
|
.prod/monty_api/context_types.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Any, Protocol
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class HelperRuntimeContext(Protocol):
|
| 7 |
+
"""Typed helper-facing runtime context interface."""
|
| 8 |
+
|
| 9 |
+
helper_registry: dict[str, Any]
|
| 10 |
+
call_count: dict[str, int]
|
| 11 |
+
trace: list[dict[str, Any]]
|
| 12 |
+
limit_summaries: list[dict[str, Any]]
|
| 13 |
+
latest_helper_error_box: dict[str, dict[str, Any] | None]
|
| 14 |
+
internal_helper_used: dict[str, bool]
|
| 15 |
+
|
| 16 |
+
async def call_helper(
|
| 17 |
+
self, helper_name: str, /, *args: Any, **kwargs: Any
|
| 18 |
+
) -> Any: ...
|
| 19 |
+
|
| 20 |
+
def __getattr__(self, name: str) -> Any: ...
|
.prod/monty_api/helper_contracts.py
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import inspect
|
| 4 |
+
import re
|
| 5 |
+
from collections.abc import Callable, Mapping
|
| 6 |
+
from functools import lru_cache
|
| 7 |
+
from typing import Any, TypedDict, get_args
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
import huggingface_hub.hf_api as hf_api
|
| 11 |
+
except ModuleNotFoundError: # pragma: no cover - dependency-light test/import path
|
| 12 |
+
hf_api = None
|
| 13 |
+
|
| 14 |
+
from .aliases import REPO_SORT_KEYS
|
| 15 |
+
from .constants import (
|
| 16 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 17 |
+
ACTOR_CANONICAL_FIELDS,
|
| 18 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 19 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 20 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 21 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 22 |
+
PROFILE_CANONICAL_FIELDS,
|
| 23 |
+
REPO_CANONICAL_FIELDS,
|
| 24 |
+
USER_CANONICAL_FIELDS,
|
| 25 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 26 |
+
)
|
| 27 |
+
from .registry import (
|
| 28 |
+
HELPER_DEFAULT_METADATA,
|
| 29 |
+
PAGINATION_POLICY,
|
| 30 |
+
REPO_SEARCH_ALLOWED_EXPAND,
|
| 31 |
+
RUNTIME_CAPABILITY_FIELDS,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
HELPER_RESULT_ENVELOPE = {
|
| 36 |
+
"ok": "bool",
|
| 37 |
+
"item": "dict | None",
|
| 38 |
+
"items": "list[dict]",
|
| 39 |
+
"meta": "dict",
|
| 40 |
+
"error": "str | None",
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
FILTER_OPERATORS = ["eq", "in", "contains", "icontains", "gte", "lte"]
|
| 44 |
+
REPO_TYPE_VALUES = ["model", "dataset", "space"]
|
| 45 |
+
TRENDING_CANONICAL_FIELDS = [*REPO_CANONICAL_FIELDS, "trending_rank"]
|
| 46 |
+
COMMON_REPO_SEARCH_PARAMS = {
|
| 47 |
+
"search",
|
| 48 |
+
"filter",
|
| 49 |
+
"author",
|
| 50 |
+
"sort",
|
| 51 |
+
"limit",
|
| 52 |
+
"fields",
|
| 53 |
+
"post_filter",
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class HelperContract(TypedDict, total=False):
|
| 58 |
+
name: str
|
| 59 |
+
signature: str
|
| 60 |
+
category: str
|
| 61 |
+
backed_by: str
|
| 62 |
+
supported_params: list[str]
|
| 63 |
+
sort_values: list[str]
|
| 64 |
+
sort_values_by_repo_type: dict[str, list[str]]
|
| 65 |
+
expand_values: list[str]
|
| 66 |
+
param_values: dict[str, list[str]]
|
| 67 |
+
fields_contract: dict[str, Any]
|
| 68 |
+
where_contract: dict[str, Any]
|
| 69 |
+
post_filter_contract: dict[str, Any]
|
| 70 |
+
limit_contract: dict[str, Any]
|
| 71 |
+
returns: dict[str, Any]
|
| 72 |
+
notes: str
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
FIELD_GROUPS: dict[str, list[str]] = {
|
| 76 |
+
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 77 |
+
"actor": list(ACTOR_CANONICAL_FIELDS),
|
| 78 |
+
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
| 79 |
+
"daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
|
| 80 |
+
"discussion": list(DISCUSSION_CANONICAL_FIELDS),
|
| 81 |
+
"discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
| 82 |
+
"profile": list(PROFILE_CANONICAL_FIELDS),
|
| 83 |
+
"repo": list(REPO_CANONICAL_FIELDS),
|
| 84 |
+
"trending_repo": list(TRENDING_CANONICAL_FIELDS),
|
| 85 |
+
"runtime_capability": list(RUNTIME_CAPABILITY_FIELDS),
|
| 86 |
+
"user": list(USER_CANONICAL_FIELDS),
|
| 87 |
+
"user_like": list(USER_LIKES_CANONICAL_FIELDS),
|
| 88 |
+
}
|
| 89 |
+
RUNTIME_CAPABILITY_SECTION_VALUES = [
|
| 90 |
+
field for field in RUNTIME_CAPABILITY_FIELDS if field != "allowed_sections"
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
|
| 95 |
+
"hf_collection_items": {
|
| 96 |
+
"category": "collection_navigation",
|
| 97 |
+
"row_type": "repo",
|
| 98 |
+
"fields_group": "repo",
|
| 99 |
+
"filter_param": "where",
|
| 100 |
+
"filter_group": "repo",
|
| 101 |
+
"param_values": {"repo_types": REPO_TYPE_VALUES},
|
| 102 |
+
},
|
| 103 |
+
"hf_collections_search": {
|
| 104 |
+
"category": "collection_search",
|
| 105 |
+
"row_type": "collection",
|
| 106 |
+
"fields_group": "collection",
|
| 107 |
+
"filter_param": "where",
|
| 108 |
+
"filter_group": "collection",
|
| 109 |
+
},
|
| 110 |
+
"hf_daily_papers": {
|
| 111 |
+
"category": "curated_feed",
|
| 112 |
+
"row_type": "daily_paper",
|
| 113 |
+
"fields_group": "daily_paper",
|
| 114 |
+
"filter_param": "where",
|
| 115 |
+
"filter_group": "daily_paper",
|
| 116 |
+
},
|
| 117 |
+
"hf_datasets_search": {
|
| 118 |
+
"category": "wrapped_hf_repo_search",
|
| 119 |
+
"row_type": "repo",
|
| 120 |
+
"fields_group": "repo",
|
| 121 |
+
"filter_param": "post_filter",
|
| 122 |
+
"filter_group": "repo",
|
| 123 |
+
"upstream_repo_type": "dataset",
|
| 124 |
+
},
|
| 125 |
+
"hf_models_search": {
|
| 126 |
+
"category": "wrapped_hf_repo_search",
|
| 127 |
+
"row_type": "repo",
|
| 128 |
+
"fields_group": "repo",
|
| 129 |
+
"filter_param": "post_filter",
|
| 130 |
+
"filter_group": "repo",
|
| 131 |
+
"upstream_repo_type": "model",
|
| 132 |
+
},
|
| 133 |
+
"hf_org_members": {
|
| 134 |
+
"category": "graph_scan",
|
| 135 |
+
"row_type": "actor",
|
| 136 |
+
"fields_group": "actor",
|
| 137 |
+
"filter_param": "where",
|
| 138 |
+
"filter_group": "actor",
|
| 139 |
+
},
|
| 140 |
+
"hf_profile_summary": {
|
| 141 |
+
"category": "profile_summary",
|
| 142 |
+
"row_type": "profile",
|
| 143 |
+
"param_values": {"include": ["likes", "activity"]},
|
| 144 |
+
},
|
| 145 |
+
"hf_recent_activity": {
|
| 146 |
+
"category": "activity_feed",
|
| 147 |
+
"row_type": "activity",
|
| 148 |
+
"fields_group": "activity",
|
| 149 |
+
"filter_param": "where",
|
| 150 |
+
"filter_group": "activity",
|
| 151 |
+
"param_values": {"feed_type": ["user", "org"], "repo_types": REPO_TYPE_VALUES},
|
| 152 |
+
},
|
| 153 |
+
"hf_repo_details": {
|
| 154 |
+
"category": "repo_detail",
|
| 155 |
+
"row_type": "repo",
|
| 156 |
+
"fields_group": "repo",
|
| 157 |
+
"param_values": {"repo_type": [*REPO_TYPE_VALUES, "auto"]},
|
| 158 |
+
},
|
| 159 |
+
"hf_repo_discussion_details": {
|
| 160 |
+
"category": "discussion_detail",
|
| 161 |
+
"row_type": "discussion_detail",
|
| 162 |
+
"fields_group": "discussion_detail",
|
| 163 |
+
"param_values": {"repo_type": REPO_TYPE_VALUES},
|
| 164 |
+
},
|
| 165 |
+
"hf_repo_discussions": {
|
| 166 |
+
"category": "discussion_summary",
|
| 167 |
+
"row_type": "discussion",
|
| 168 |
+
"fields_group": "discussion",
|
| 169 |
+
"param_values": {"repo_type": REPO_TYPE_VALUES},
|
| 170 |
+
},
|
| 171 |
+
"hf_repo_likers": {
|
| 172 |
+
"category": "repo_to_users",
|
| 173 |
+
"row_type": "actor",
|
| 174 |
+
"fields_group": "actor",
|
| 175 |
+
"filter_param": "where",
|
| 176 |
+
"filter_group": "actor",
|
| 177 |
+
"param_values": {"repo_type": REPO_TYPE_VALUES},
|
| 178 |
+
},
|
| 179 |
+
"hf_repo_search": {
|
| 180 |
+
"category": "cross_type_repo_search",
|
| 181 |
+
"row_type": "repo",
|
| 182 |
+
"fields_group": "repo",
|
| 183 |
+
"filter_param": "post_filter",
|
| 184 |
+
"filter_group": "repo",
|
| 185 |
+
"param_values": {"repo_type": REPO_TYPE_VALUES, "repo_types": REPO_TYPE_VALUES},
|
| 186 |
+
},
|
| 187 |
+
"hf_runtime_capabilities": {
|
| 188 |
+
"category": "introspection",
|
| 189 |
+
"row_type": "runtime_capability",
|
| 190 |
+
"param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
|
| 191 |
+
},
|
| 192 |
+
"hf_spaces_search": {
|
| 193 |
+
"category": "wrapped_hf_repo_search",
|
| 194 |
+
"row_type": "repo",
|
| 195 |
+
"fields_group": "repo",
|
| 196 |
+
"filter_param": "post_filter",
|
| 197 |
+
"filter_group": "repo",
|
| 198 |
+
"upstream_repo_type": "space",
|
| 199 |
+
},
|
| 200 |
+
"hf_trending": {
|
| 201 |
+
"category": "curated_repo_feed",
|
| 202 |
+
"row_type": "repo",
|
| 203 |
+
"fields_group": "trending_repo",
|
| 204 |
+
"filter_param": "where",
|
| 205 |
+
"filter_group": "trending_repo",
|
| 206 |
+
"param_values": {"repo_type": [*REPO_TYPE_VALUES, "all"]},
|
| 207 |
+
},
|
| 208 |
+
"hf_user_graph": {
|
| 209 |
+
"category": "graph_scan",
|
| 210 |
+
"row_type": "actor",
|
| 211 |
+
"fields_group": "actor",
|
| 212 |
+
"filter_param": "where",
|
| 213 |
+
"filter_group": "actor",
|
| 214 |
+
"param_values": {
|
| 215 |
+
"relation": ["followers", "following"],
|
| 216 |
+
},
|
| 217 |
+
},
|
| 218 |
+
"hf_user_likes": {
|
| 219 |
+
"category": "user_to_repos",
|
| 220 |
+
"row_type": "user_like",
|
| 221 |
+
"fields_group": "user_like",
|
| 222 |
+
"filter_param": "where",
|
| 223 |
+
"filter_group": "user_like",
|
| 224 |
+
"param_values": {
|
| 225 |
+
"repo_types": REPO_TYPE_VALUES,
|
| 226 |
+
"sort": ["liked_at", "repo_likes", "repo_downloads"],
|
| 227 |
+
},
|
| 228 |
+
},
|
| 229 |
+
"hf_whoami": {
|
| 230 |
+
"category": "identity",
|
| 231 |
+
"row_type": "user",
|
| 232 |
+
},
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def _dedupe(values: list[str]) -> list[str]:
|
| 237 |
+
seen: set[str] = set()
|
| 238 |
+
out: list[str] = []
|
| 239 |
+
for value in values:
|
| 240 |
+
item = str(value).strip()
|
| 241 |
+
if not item or item in seen:
|
| 242 |
+
continue
|
| 243 |
+
seen.add(item)
|
| 244 |
+
out.append(item)
|
| 245 |
+
return out
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def _snake_case_token(value: str) -> str:
|
| 249 |
+
cleaned = str(value).strip().replace("-", "_")
|
| 250 |
+
cleaned = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", cleaned)
|
| 251 |
+
cleaned = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", cleaned)
|
| 252 |
+
cleaned = re.sub(r"__+", "_", cleaned)
|
| 253 |
+
return cleaned.lower()
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def repo_expand_alias_map(repo_type: str) -> dict[str, str]:
|
| 257 |
+
aliases: dict[str, str] = {}
|
| 258 |
+
for raw_value in REPO_SEARCH_ALLOWED_EXPAND.get(repo_type, []):
|
| 259 |
+
aliases[str(raw_value)] = str(raw_value)
|
| 260 |
+
aliases[_snake_case_token(str(raw_value))] = str(raw_value)
|
| 261 |
+
return aliases
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def normalized_repo_expand_values(repo_type: str) -> list[str]:
|
| 265 |
+
return _dedupe(
|
| 266 |
+
[
|
| 267 |
+
_snake_case_token(value)
|
| 268 |
+
for value in REPO_SEARCH_ALLOWED_EXPAND.get(repo_type, [])
|
| 269 |
+
]
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
@lru_cache(maxsize=1)
|
| 274 |
+
def _upstream_repo_search_facts() -> dict[str, dict[str, Any]]:
|
| 275 |
+
alias_names = {
|
| 276 |
+
"dataset": ("list_datasets", "DatasetSort_T"),
|
| 277 |
+
"model": ("list_models", "ModelSort_T"),
|
| 278 |
+
"space": ("list_spaces", "SpaceSort_T"),
|
| 279 |
+
}
|
| 280 |
+
facts: dict[str, dict[str, Any]] = {}
|
| 281 |
+
for repo_type, (method_name, sort_alias_name) in alias_names.items():
|
| 282 |
+
if hf_api is None:
|
| 283 |
+
supported_params = sorted(COMMON_REPO_SEARCH_PARAMS)
|
| 284 |
+
sort_values = sorted(REPO_SORT_KEYS.get(repo_type, set()))
|
| 285 |
+
else:
|
| 286 |
+
method = getattr(hf_api.HfApi, method_name)
|
| 287 |
+
signature = inspect.signature(method)
|
| 288 |
+
supported_params = [
|
| 289 |
+
name for name in signature.parameters if name not in {"self", "token"}
|
| 290 |
+
]
|
| 291 |
+
sort_alias = getattr(hf_api, sort_alias_name, None)
|
| 292 |
+
sort_values = _dedupe([str(value) for value in get_args(sort_alias)])
|
| 293 |
+
facts[repo_type] = {
|
| 294 |
+
"method_name": f"HfApi.{method_name}",
|
| 295 |
+
"supported_params": supported_params,
|
| 296 |
+
"sort_values": sort_values,
|
| 297 |
+
"expand_values": normalized_repo_expand_values(repo_type),
|
| 298 |
+
}
|
| 299 |
+
return facts
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def _returns_contract(helper_name: str, row_type: str | None) -> dict[str, Any]:
|
| 303 |
+
metadata = HELPER_DEFAULT_METADATA.get(helper_name, {})
|
| 304 |
+
returns: dict[str, Any] = {"envelope": dict(HELPER_RESULT_ENVELOPE)}
|
| 305 |
+
if row_type is not None:
|
| 306 |
+
returns["row_type"] = row_type
|
| 307 |
+
for key in ("default_fields", "guaranteed_fields", "optional_fields"):
|
| 308 |
+
value = metadata.get(key)
|
| 309 |
+
if isinstance(value, list):
|
| 310 |
+
returns[key] = list(value)
|
| 311 |
+
return returns
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def _limit_contract(helper_name: str) -> dict[str, Any] | None:
|
| 315 |
+
metadata = HELPER_DEFAULT_METADATA.get(helper_name, {})
|
| 316 |
+
limits: dict[str, Any] = {}
|
| 317 |
+
for key in ("default_limit", "max_limit"):
|
| 318 |
+
value = metadata.get(key)
|
| 319 |
+
if value is not None:
|
| 320 |
+
limits[key] = value
|
| 321 |
+
for key, value in PAGINATION_POLICY.get(helper_name, {}).items():
|
| 322 |
+
if value is not None and key not in limits:
|
| 323 |
+
limits[key] = value
|
| 324 |
+
return limits or None
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def _fields_contract(field_group: str | None) -> dict[str, Any] | None:
|
| 328 |
+
if field_group is None:
|
| 329 |
+
return None
|
| 330 |
+
return {
|
| 331 |
+
"canonical_only": True,
|
| 332 |
+
"allowed_fields": list(FIELD_GROUPS[field_group]),
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def _filter_contract(filter_param: str | None, field_group: str | None) -> tuple[str, dict[str, Any]] | None:
|
| 337 |
+
if filter_param is None or field_group is None:
|
| 338 |
+
return None
|
| 339 |
+
return (
|
| 340 |
+
f"{filter_param}_contract",
|
| 341 |
+
{
|
| 342 |
+
"allowed_fields": list(FIELD_GROUPS[field_group]),
|
| 343 |
+
"supported_ops": list(FILTER_OPERATORS),
|
| 344 |
+
"normalized_only": True,
|
| 345 |
+
},
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
def _notes_for_helper(helper_name: str) -> str | None:
|
| 350 |
+
note = HELPER_DEFAULT_METADATA.get(helper_name, {}).get("notes")
|
| 351 |
+
if not isinstance(note, str):
|
| 352 |
+
return None
|
| 353 |
+
cleaned = note.strip()
|
| 354 |
+
return cleaned or None
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def _param_values_for_helper(helper_name: str) -> dict[str, list[str]] | None:
|
| 358 |
+
values = {
|
| 359 |
+
key: list(raw_values)
|
| 360 |
+
for key, raw_values in HELPER_CONTRACT_SPECS.get(helper_name, {})
|
| 361 |
+
.get("param_values", {})
|
| 362 |
+
.items()
|
| 363 |
+
}
|
| 364 |
+
if helper_name == "hf_repo_search":
|
| 365 |
+
values["sort"] = sorted(_dedupe([key for keys in REPO_SORT_KEYS.values() for key in keys]))
|
| 366 |
+
return values or None
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def build_helper_contracts(
|
| 370 |
+
helper_functions: Mapping[str, Callable[..., Any]],
|
| 371 |
+
) -> dict[str, HelperContract]:
|
| 372 |
+
upstream_facts = _upstream_repo_search_facts()
|
| 373 |
+
contracts: dict[str, HelperContract] = {}
|
| 374 |
+
for helper_name, fn in sorted(helper_functions.items()):
|
| 375 |
+
spec = HELPER_CONTRACT_SPECS.get(helper_name, {})
|
| 376 |
+
row_type = spec.get("row_type")
|
| 377 |
+
fields_group = spec.get("fields_group")
|
| 378 |
+
filter_param = spec.get("filter_param")
|
| 379 |
+
filter_group = spec.get("filter_group")
|
| 380 |
+
contract: HelperContract = {
|
| 381 |
+
"name": helper_name,
|
| 382 |
+
"signature": f"await {helper_name}{inspect.signature(fn)}",
|
| 383 |
+
"category": str(spec.get("category") or "helper"),
|
| 384 |
+
"supported_params": list(inspect.signature(fn).parameters),
|
| 385 |
+
"returns": _returns_contract(helper_name, row_type),
|
| 386 |
+
}
|
| 387 |
+
fields_contract = _fields_contract(fields_group)
|
| 388 |
+
if fields_contract is not None:
|
| 389 |
+
contract["fields_contract"] = fields_contract
|
| 390 |
+
filter_contract = _filter_contract(filter_param, filter_group)
|
| 391 |
+
if filter_contract is not None:
|
| 392 |
+
contract[filter_contract[0]] = filter_contract[1]
|
| 393 |
+
limit_contract = _limit_contract(helper_name)
|
| 394 |
+
if limit_contract is not None:
|
| 395 |
+
contract["limit_contract"] = limit_contract
|
| 396 |
+
param_values = _param_values_for_helper(helper_name)
|
| 397 |
+
if param_values is not None:
|
| 398 |
+
contract["param_values"] = param_values
|
| 399 |
+
|
| 400 |
+
upstream_repo_type = spec.get("upstream_repo_type")
|
| 401 |
+
if isinstance(upstream_repo_type, str):
|
| 402 |
+
upstream = upstream_facts[upstream_repo_type]
|
| 403 |
+
contract["backed_by"] = str(upstream["method_name"])
|
| 404 |
+
contract["sort_values"] = list(upstream["sort_values"])
|
| 405 |
+
contract["expand_values"] = list(upstream["expand_values"])
|
| 406 |
+
elif helper_name == "hf_repo_search":
|
| 407 |
+
contract["sort_values_by_repo_type"] = {
|
| 408 |
+
repo_type: sorted(values)
|
| 409 |
+
for repo_type, values in sorted(REPO_SORT_KEYS.items())
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
if helper_name == "hf_user_likes":
|
| 413 |
+
contract["sort_values"] = ["liked_at", "repo_likes", "repo_downloads"]
|
| 414 |
+
|
| 415 |
+
note = _notes_for_helper(helper_name)
|
| 416 |
+
if note is not None:
|
| 417 |
+
contract["notes"] = note
|
| 418 |
+
contracts[helper_name] = contract
|
| 419 |
+
return contracts
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
def _format_list(values: list[str] | None) -> str:
|
| 423 |
+
if not values:
|
| 424 |
+
return "[]"
|
| 425 |
+
return ", ".join(f"`{value}`" for value in values)
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def _append_returns(lines: list[str], returns: Mapping[str, Any]) -> None:
|
| 429 |
+
lines.append("- returns:")
|
| 430 |
+
envelope = returns.get("envelope")
|
| 431 |
+
if isinstance(envelope, Mapping):
|
| 432 |
+
lines.append(" - envelope: `{ok, item, items, meta, error}`")
|
| 433 |
+
row_type = returns.get("row_type")
|
| 434 |
+
if isinstance(row_type, str):
|
| 435 |
+
lines.append(f" - row_type: `{row_type}`")
|
| 436 |
+
for key in ("default_fields", "guaranteed_fields", "optional_fields"):
|
| 437 |
+
value = returns.get(key)
|
| 438 |
+
if isinstance(value, list):
|
| 439 |
+
lines.append(f" - {key}: {_format_list(value)}")
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
def _append_named_contract(
|
| 443 |
+
lines: list[str],
|
| 444 |
+
label: str,
|
| 445 |
+
contract: Mapping[str, Any] | None,
|
| 446 |
+
) -> None:
|
| 447 |
+
if not isinstance(contract, Mapping):
|
| 448 |
+
return
|
| 449 |
+
lines.append(f"- {label}:")
|
| 450 |
+
allowed_fields = contract.get("allowed_fields")
|
| 451 |
+
if isinstance(allowed_fields, list):
|
| 452 |
+
lines.append(f" - allowed_fields: {_format_list(allowed_fields)}")
|
| 453 |
+
supported_ops = contract.get("supported_ops")
|
| 454 |
+
if isinstance(supported_ops, list):
|
| 455 |
+
lines.append(f" - supported_ops: {_format_list(supported_ops)}")
|
| 456 |
+
canonical_only = contract.get("canonical_only")
|
| 457 |
+
if canonical_only is True:
|
| 458 |
+
lines.append(" - canonical_only: `true`")
|
| 459 |
+
normalized_only = contract.get("normalized_only")
|
| 460 |
+
if normalized_only is True:
|
| 461 |
+
lines.append(" - normalized_only: `true`")
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
def _append_limit_contract(lines: list[str], contract: Mapping[str, Any] | None) -> None:
|
| 465 |
+
if not isinstance(contract, Mapping) or not contract:
|
| 466 |
+
return
|
| 467 |
+
lines.append("- limit_contract:")
|
| 468 |
+
for key, value in contract.items():
|
| 469 |
+
lines.append(f" - {key}: `{value}`")
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
def _append_param_values(lines: list[str], param_values: Mapping[str, Any] | None) -> None:
|
| 473 |
+
if not isinstance(param_values, Mapping) or not param_values:
|
| 474 |
+
return
|
| 475 |
+
lines.append("- param_values:")
|
| 476 |
+
for key, value in param_values.items():
|
| 477 |
+
if isinstance(value, list):
|
| 478 |
+
lines.append(f" - {key}: {_format_list(value)}")
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
def build_helper_contracts_markdown(
|
| 482 |
+
helper_contracts: Mapping[str, Mapping[str, Any]],
|
| 483 |
+
) -> str:
|
| 484 |
+
lines = [
|
| 485 |
+
"## Helper contracts (generated from runtime + wrapper metadata)",
|
| 486 |
+
"",
|
| 487 |
+
"These contracts describe the normalized wrapper surface exposed to generated code.",
|
| 488 |
+
"Field names and helper-visible enum values are canonical snake_case wrapper names.",
|
| 489 |
+
"",
|
| 490 |
+
"All helpers return the same envelope: `{ok, item, items, meta, error}`.",
|
| 491 |
+
"",
|
| 492 |
+
]
|
| 493 |
+
for helper_name, contract in sorted(helper_contracts.items()):
|
| 494 |
+
lines.append(f"### {helper_name}")
|
| 495 |
+
lines.append("")
|
| 496 |
+
category = contract.get("category")
|
| 497 |
+
if isinstance(category, str):
|
| 498 |
+
lines.append(f"- category: `{category}`")
|
| 499 |
+
backed_by = contract.get("backed_by")
|
| 500 |
+
if isinstance(backed_by, str):
|
| 501 |
+
lines.append(f"- backed_by: `{backed_by}`")
|
| 502 |
+
returns = contract.get("returns")
|
| 503 |
+
if isinstance(returns, Mapping):
|
| 504 |
+
_append_returns(lines, returns)
|
| 505 |
+
supported_params = contract.get("supported_params")
|
| 506 |
+
if isinstance(supported_params, list):
|
| 507 |
+
lines.append(f"- supported_params: {_format_list(supported_params)}")
|
| 508 |
+
sort_values = contract.get("sort_values")
|
| 509 |
+
if isinstance(sort_values, list):
|
| 510 |
+
lines.append(f"- sort_values: {_format_list(sort_values)}")
|
| 511 |
+
sort_values_by_repo_type = contract.get("sort_values_by_repo_type")
|
| 512 |
+
if isinstance(sort_values_by_repo_type, Mapping):
|
| 513 |
+
lines.append("- sort_values_by_repo_type:")
|
| 514 |
+
for repo_type, values in sort_values_by_repo_type.items():
|
| 515 |
+
if isinstance(values, list):
|
| 516 |
+
lines.append(f" - {repo_type}: {_format_list(values)}")
|
| 517 |
+
expand_values = contract.get("expand_values")
|
| 518 |
+
if isinstance(expand_values, list):
|
| 519 |
+
lines.append(f"- expand_values: {_format_list(expand_values)}")
|
| 520 |
+
_append_param_values(lines, contract.get("param_values"))
|
| 521 |
+
_append_named_contract(lines, "fields_contract", contract.get("fields_contract"))
|
| 522 |
+
_append_named_contract(lines, "where_contract", contract.get("where_contract"))
|
| 523 |
+
_append_named_contract(
|
| 524 |
+
lines, "post_filter_contract", contract.get("post_filter_contract")
|
| 525 |
+
)
|
| 526 |
+
_append_limit_contract(lines, contract.get("limit_contract"))
|
| 527 |
+
notes = contract.get("notes")
|
| 528 |
+
if isinstance(notes, str):
|
| 529 |
+
lines.append(f"- notes: {notes}")
|
| 530 |
+
lines.append("")
|
| 531 |
+
return "\n".join(lines).rstrip() + "\n"
|
.prod/monty_api/helpers/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .activity import register_activity_helpers
|
| 2 |
+
from .collections import register_collection_helpers
|
| 3 |
+
from .introspection import register_introspection_helpers
|
| 4 |
+
from .profiles import register_profile_helpers
|
| 5 |
+
from .repos import register_repo_helpers
|
| 6 |
+
|
| 7 |
+
__all__ = [
|
| 8 |
+
"register_activity_helpers",
|
| 9 |
+
"register_collection_helpers",
|
| 10 |
+
"register_introspection_helpers",
|
| 11 |
+
"register_profile_helpers",
|
| 12 |
+
"register_repo_helpers",
|
| 13 |
+
]
|
.prod/monty_api/helpers/activity.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
|
| 4 |
+
from functools import partial
|
| 5 |
+
from typing import Any, Callable
|
| 6 |
+
|
| 7 |
+
from ..constants import (
|
| 8 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 9 |
+
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 10 |
+
RECENT_ACTIVITY_PAGE_SIZE,
|
| 11 |
+
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 12 |
+
)
|
| 13 |
+
from ..context_types import HelperRuntimeContext
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
async def hf_recent_activity(
|
| 17 |
+
ctx: HelperRuntimeContext,
|
| 18 |
+
feed_type: str | None = None,
|
| 19 |
+
entity: str | None = None,
|
| 20 |
+
activity_types: list[str] | None = None,
|
| 21 |
+
repo_types: list[str] | None = None,
|
| 22 |
+
limit: int | None = None,
|
| 23 |
+
max_pages: int | None = None,
|
| 24 |
+
start_cursor: str | None = None,
|
| 25 |
+
count_only: bool = False,
|
| 26 |
+
where: dict[str, Any] | None = None,
|
| 27 |
+
fields: list[str] | None = None,
|
| 28 |
+
) -> dict[str, Any]:
|
| 29 |
+
start_calls = ctx.call_count["n"]
|
| 30 |
+
default_limit = ctx._policy_int("hf_recent_activity", "default_limit", 100)
|
| 31 |
+
page_cap = ctx._policy_int(
|
| 32 |
+
"hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
|
| 33 |
+
)
|
| 34 |
+
pages_cap = ctx._policy_int(
|
| 35 |
+
"hf_recent_activity", "max_pages", RECENT_ACTIVITY_SCAN_MAX_PAGES
|
| 36 |
+
)
|
| 37 |
+
requested_max_pages = max_pages
|
| 38 |
+
ft = str(feed_type or "").strip().lower()
|
| 39 |
+
ent = str(entity or "").strip()
|
| 40 |
+
if ft not in {"user", "org"}:
|
| 41 |
+
if ft and (not ent):
|
| 42 |
+
ent = ft
|
| 43 |
+
ft = "user"
|
| 44 |
+
elif not ft and ent:
|
| 45 |
+
ft = "user"
|
| 46 |
+
if ft not in {"user", "org"}:
|
| 47 |
+
return ctx._helper_error(
|
| 48 |
+
start_calls=start_calls,
|
| 49 |
+
source="/api/recent-activity",
|
| 50 |
+
error="feed_type must be 'user' or 'org'",
|
| 51 |
+
)
|
| 52 |
+
if not ent:
|
| 53 |
+
return ctx._helper_error(
|
| 54 |
+
start_calls=start_calls,
|
| 55 |
+
source="/api/recent-activity",
|
| 56 |
+
error="entity is required",
|
| 57 |
+
)
|
| 58 |
+
limit_plan = ctx._resolve_exhaustive_limits(
|
| 59 |
+
limit=limit,
|
| 60 |
+
count_only=count_only,
|
| 61 |
+
default_limit=default_limit,
|
| 62 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 63 |
+
)
|
| 64 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 65 |
+
page_lim = page_cap
|
| 66 |
+
pages_lim = ctx._clamp_int(
|
| 67 |
+
requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
|
| 68 |
+
)
|
| 69 |
+
type_filter = {
|
| 70 |
+
str(t).strip().lower() for t in activity_types or [] if str(t).strip()
|
| 71 |
+
}
|
| 72 |
+
repo_filter = {
|
| 73 |
+
ctx._canonical_repo_type(t, default="")
|
| 74 |
+
for t in repo_types or []
|
| 75 |
+
if str(t).strip()
|
| 76 |
+
}
|
| 77 |
+
next_cursor = (
|
| 78 |
+
str(start_cursor).strip()
|
| 79 |
+
if isinstance(start_cursor, str) and start_cursor.strip()
|
| 80 |
+
else None
|
| 81 |
+
)
|
| 82 |
+
items: list[dict[str, Any]] = []
|
| 83 |
+
scanned = 0
|
| 84 |
+
matched = 0
|
| 85 |
+
pages = 0
|
| 86 |
+
exhausted_feed = False
|
| 87 |
+
stopped_for_budget = False
|
| 88 |
+
try:
|
| 89 |
+
normalized_where = ctx._normalize_where(
|
| 90 |
+
where, allowed_fields=ACTIVITY_CANONICAL_FIELDS
|
| 91 |
+
)
|
| 92 |
+
except ValueError as exc:
|
| 93 |
+
return ctx._helper_error(
|
| 94 |
+
start_calls=start_calls,
|
| 95 |
+
source="/api/recent-activity",
|
| 96 |
+
error=exc,
|
| 97 |
+
)
|
| 98 |
+
while pages < pages_lim and (applied_limit == 0 or len(items) < applied_limit):
|
| 99 |
+
if ctx._budget_remaining() <= 0:
|
| 100 |
+
stopped_for_budget = True
|
| 101 |
+
break
|
| 102 |
+
params: dict[str, Any] = {"feedType": ft, "entity": ent, "limit": page_lim}
|
| 103 |
+
if next_cursor:
|
| 104 |
+
params["cursor"] = next_cursor
|
| 105 |
+
resp = ctx._host_raw_call("/api/recent-activity", params=params)
|
| 106 |
+
if not resp.get("ok"):
|
| 107 |
+
if pages == 0:
|
| 108 |
+
return ctx._helper_error(
|
| 109 |
+
start_calls=start_calls,
|
| 110 |
+
source="/api/recent-activity",
|
| 111 |
+
error=resp.get("error") or "recent-activity fetch failed",
|
| 112 |
+
)
|
| 113 |
+
break
|
| 114 |
+
payload = resp.get("data") if isinstance(resp.get("data"), dict) else {}
|
| 115 |
+
rows = (
|
| 116 |
+
payload.get("recentActivity")
|
| 117 |
+
if isinstance(payload.get("recentActivity"), list)
|
| 118 |
+
else []
|
| 119 |
+
)
|
| 120 |
+
cursor_raw = payload.get("cursor")
|
| 121 |
+
next_cursor = cursor_raw if isinstance(cursor_raw, str) and cursor_raw else None
|
| 122 |
+
pages += 1
|
| 123 |
+
if not rows:
|
| 124 |
+
exhausted_feed = True
|
| 125 |
+
break
|
| 126 |
+
for row in rows:
|
| 127 |
+
if not isinstance(row, dict):
|
| 128 |
+
continue
|
| 129 |
+
scanned += 1
|
| 130 |
+
typ = str(row.get("type") or "").strip().lower()
|
| 131 |
+
repo_id = row.get("repoId")
|
| 132 |
+
repo_type = row.get("repoType")
|
| 133 |
+
repo_data = (
|
| 134 |
+
row.get("repoData") if isinstance(row.get("repoData"), dict) else None
|
| 135 |
+
)
|
| 136 |
+
repo_obj = row.get("repo") if isinstance(row.get("repo"), dict) else None
|
| 137 |
+
if repo_id is None and repo_data is not None:
|
| 138 |
+
repo_id = repo_data.get("id") or repo_data.get("name")
|
| 139 |
+
if repo_id is None and repo_obj is not None:
|
| 140 |
+
repo_id = repo_obj.get("id") or repo_obj.get("name")
|
| 141 |
+
if repo_type is None and repo_data is not None:
|
| 142 |
+
repo_type = repo_data.get("type")
|
| 143 |
+
if repo_type is None and repo_obj is not None:
|
| 144 |
+
repo_type = repo_obj.get("type")
|
| 145 |
+
rt = ctx._canonical_repo_type(repo_type, default="") if repo_type else ""
|
| 146 |
+
if type_filter and typ not in type_filter:
|
| 147 |
+
continue
|
| 148 |
+
if repo_filter and rt not in repo_filter:
|
| 149 |
+
continue
|
| 150 |
+
item = {
|
| 151 |
+
"timestamp": row.get("time"),
|
| 152 |
+
"event_type": row.get("type"),
|
| 153 |
+
"repo_type": rt or repo_type,
|
| 154 |
+
"repo_id": repo_id,
|
| 155 |
+
}
|
| 156 |
+
if not ctx._item_matches_where(item, normalized_where):
|
| 157 |
+
continue
|
| 158 |
+
matched += 1
|
| 159 |
+
if len(items) < applied_limit:
|
| 160 |
+
items.append(item)
|
| 161 |
+
if not next_cursor:
|
| 162 |
+
exhausted_feed = True
|
| 163 |
+
break
|
| 164 |
+
try:
|
| 165 |
+
items = ctx._project_activity_items(items, fields)
|
| 166 |
+
except ValueError as exc:
|
| 167 |
+
return ctx._helper_error(
|
| 168 |
+
start_calls=start_calls,
|
| 169 |
+
source="/api/recent-activity",
|
| 170 |
+
error=exc,
|
| 171 |
+
)
|
| 172 |
+
exact_count = exhausted_feed and (not stopped_for_budget)
|
| 173 |
+
sample_complete = (
|
| 174 |
+
exact_count and applied_limit >= matched and (not count_only or matched == 0)
|
| 175 |
+
)
|
| 176 |
+
page_limit_hit = (
|
| 177 |
+
next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
|
| 178 |
+
)
|
| 179 |
+
more_available: bool | str = ctx._derive_more_available(
|
| 180 |
+
sample_complete=sample_complete,
|
| 181 |
+
exact_count=exact_count,
|
| 182 |
+
returned=len(items),
|
| 183 |
+
total=matched if exact_count else None,
|
| 184 |
+
)
|
| 185 |
+
if next_cursor is not None:
|
| 186 |
+
more_available = True
|
| 187 |
+
elif stopped_for_budget and (not exact_count):
|
| 188 |
+
more_available = "unknown"
|
| 189 |
+
meta = ctx._build_exhaustive_result_meta(
|
| 190 |
+
base_meta={
|
| 191 |
+
"scanned": scanned,
|
| 192 |
+
"total": matched,
|
| 193 |
+
"total_matched": matched,
|
| 194 |
+
"pages": pages,
|
| 195 |
+
"count_source": "scan" if exact_count else "none",
|
| 196 |
+
"lower_bound": not exact_count,
|
| 197 |
+
"page_limit": page_lim,
|
| 198 |
+
"stopped_for_budget": stopped_for_budget,
|
| 199 |
+
"feed_type": ft,
|
| 200 |
+
"entity": ent,
|
| 201 |
+
},
|
| 202 |
+
limit_plan=limit_plan,
|
| 203 |
+
matched_count=matched,
|
| 204 |
+
returned_count=len(items),
|
| 205 |
+
exact_count=exact_count,
|
| 206 |
+
count_only=count_only,
|
| 207 |
+
sample_complete=sample_complete,
|
| 208 |
+
more_available=more_available,
|
| 209 |
+
page_limit_hit=page_limit_hit,
|
| 210 |
+
truncated_extra=stopped_for_budget,
|
| 211 |
+
requested_max_pages=requested_max_pages,
|
| 212 |
+
applied_max_pages=pages_lim,
|
| 213 |
+
)
|
| 214 |
+
return ctx._helper_success(
|
| 215 |
+
start_calls=start_calls,
|
| 216 |
+
source="/api/recent-activity",
|
| 217 |
+
items=items,
|
| 218 |
+
meta=meta,
|
| 219 |
+
cursor=next_cursor,
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def register_activity_helpers(
|
| 224 |
+
ctx: HelperRuntimeContext,
|
| 225 |
+
) -> dict[str, Callable[..., Any]]:
|
| 226 |
+
return {"hf_recent_activity": partial(hf_recent_activity, ctx)}
|
.prod/monty_api/helpers/collections.py
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
|
| 4 |
+
from functools import partial
|
| 5 |
+
from typing import Any, Callable
|
| 6 |
+
|
| 7 |
+
from ..constants import (
|
| 8 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 9 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 10 |
+
REPO_CANONICAL_FIELDS,
|
| 11 |
+
)
|
| 12 |
+
from ..context_types import HelperRuntimeContext
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
async def hf_collections_search(
|
| 16 |
+
ctx: HelperRuntimeContext,
|
| 17 |
+
query: str | None = None,
|
| 18 |
+
owner: str | None = None,
|
| 19 |
+
limit: int = 20,
|
| 20 |
+
count_only: bool = False,
|
| 21 |
+
where: dict[str, Any] | None = None,
|
| 22 |
+
fields: list[str] | None = None,
|
| 23 |
+
) -> dict[str, Any]:
|
| 24 |
+
start_calls = ctx.call_count["n"]
|
| 25 |
+
default_limit = ctx._policy_int("hf_collections_search", "default_limit", 20)
|
| 26 |
+
max_limit = ctx._policy_int(
|
| 27 |
+
"hf_collections_search", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 28 |
+
)
|
| 29 |
+
if count_only:
|
| 30 |
+
limit = 0
|
| 31 |
+
applied_limit = ctx._clamp_int(
|
| 32 |
+
limit,
|
| 33 |
+
default=default_limit,
|
| 34 |
+
minimum=0,
|
| 35 |
+
maximum=max_limit,
|
| 36 |
+
)
|
| 37 |
+
owner_clean = str(owner or "").strip() or None
|
| 38 |
+
owner_casefold = owner_clean.casefold() if owner_clean is not None else None
|
| 39 |
+
fetch_limit = max_limit if applied_limit == 0 or owner_clean else applied_limit
|
| 40 |
+
if owner_clean:
|
| 41 |
+
fetch_limit = min(fetch_limit, 100)
|
| 42 |
+
term = str(query or "").strip()
|
| 43 |
+
if not term and owner_clean:
|
| 44 |
+
term = owner_clean
|
| 45 |
+
if not term:
|
| 46 |
+
return ctx._helper_error(
|
| 47 |
+
start_calls=start_calls,
|
| 48 |
+
source="/api/collections",
|
| 49 |
+
error="query or owner is required",
|
| 50 |
+
)
|
| 51 |
+
params: dict[str, Any] = {"limit": fetch_limit}
|
| 52 |
+
if term:
|
| 53 |
+
params["q"] = term
|
| 54 |
+
if owner_clean:
|
| 55 |
+
params["owner"] = owner_clean
|
| 56 |
+
resp = ctx._host_raw_call("/api/collections", params=params)
|
| 57 |
+
if not resp.get("ok"):
|
| 58 |
+
return ctx._helper_error(
|
| 59 |
+
start_calls=start_calls,
|
| 60 |
+
source="/api/collections",
|
| 61 |
+
error=resp.get("error") or "collections fetch failed",
|
| 62 |
+
)
|
| 63 |
+
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 64 |
+
|
| 65 |
+
def _row_owner_matches_owner(row: Any) -> bool:
|
| 66 |
+
if owner_casefold is None or not isinstance(row, dict):
|
| 67 |
+
return owner_casefold is None
|
| 68 |
+
row_owner = ctx._author_from_any(row.get("owner")) or ctx._author_from_any(
|
| 69 |
+
row.get("ownerData")
|
| 70 |
+
)
|
| 71 |
+
if (
|
| 72 |
+
not row_owner
|
| 73 |
+
and isinstance(row.get("slug"), str)
|
| 74 |
+
and "/" in str(row.get("slug"))
|
| 75 |
+
):
|
| 76 |
+
row_owner = str(row.get("slug")).split("/", 1)[0]
|
| 77 |
+
if not isinstance(row_owner, str) or not row_owner:
|
| 78 |
+
return False
|
| 79 |
+
return row_owner.casefold() == owner_casefold
|
| 80 |
+
|
| 81 |
+
owner_fallback_used = False
|
| 82 |
+
if owner_casefold is not None and not any(
|
| 83 |
+
_row_owner_matches_owner(row) for row in payload
|
| 84 |
+
):
|
| 85 |
+
fallback_params: dict[str, Any] = {"limit": fetch_limit}
|
| 86 |
+
if term:
|
| 87 |
+
fallback_params["q"] = term
|
| 88 |
+
fallback_resp = ctx._host_raw_call("/api/collections", params=fallback_params)
|
| 89 |
+
if fallback_resp.get("ok"):
|
| 90 |
+
fallback_payload = (
|
| 91 |
+
fallback_resp.get("data")
|
| 92 |
+
if isinstance(fallback_resp.get("data"), list)
|
| 93 |
+
else []
|
| 94 |
+
)
|
| 95 |
+
if any(_row_owner_matches_owner(row) for row in fallback_payload):
|
| 96 |
+
payload = fallback_payload
|
| 97 |
+
owner_fallback_used = True
|
| 98 |
+
|
| 99 |
+
items: list[dict[str, Any]] = []
|
| 100 |
+
for row in payload[:fetch_limit]:
|
| 101 |
+
if not isinstance(row, dict):
|
| 102 |
+
continue
|
| 103 |
+
row_owner = ctx._author_from_any(row.get("owner")) or ctx._author_from_any(
|
| 104 |
+
row.get("ownerData")
|
| 105 |
+
)
|
| 106 |
+
if (
|
| 107 |
+
not row_owner
|
| 108 |
+
and isinstance(row.get("slug"), str)
|
| 109 |
+
and "/" in str(row.get("slug"))
|
| 110 |
+
):
|
| 111 |
+
row_owner = str(row.get("slug")).split("/", 1)[0]
|
| 112 |
+
if owner_casefold is not None and (
|
| 113 |
+
not isinstance(row_owner, str) or row_owner.casefold() != owner_casefold
|
| 114 |
+
):
|
| 115 |
+
continue
|
| 116 |
+
owner_payload = row.get("owner") if isinstance(row.get("owner"), dict) else {}
|
| 117 |
+
collection_items = (
|
| 118 |
+
row.get("items") if isinstance(row.get("items"), list) else []
|
| 119 |
+
)
|
| 120 |
+
slug = row.get("slug")
|
| 121 |
+
items.append(
|
| 122 |
+
{
|
| 123 |
+
"collection_id": slug,
|
| 124 |
+
"slug": slug,
|
| 125 |
+
"title": row.get("title"),
|
| 126 |
+
"owner": row_owner,
|
| 127 |
+
"owner_type": owner_payload.get("type")
|
| 128 |
+
if isinstance(owner_payload.get("type"), str)
|
| 129 |
+
else None,
|
| 130 |
+
"description": row.get("description"),
|
| 131 |
+
"gating": row.get("gating"),
|
| 132 |
+
"last_updated": row.get("lastUpdated"),
|
| 133 |
+
"item_count": len(collection_items),
|
| 134 |
+
}
|
| 135 |
+
)
|
| 136 |
+
try:
|
| 137 |
+
items = ctx._apply_where(
|
| 138 |
+
items, where, allowed_fields=COLLECTION_CANONICAL_FIELDS
|
| 139 |
+
)
|
| 140 |
+
except ValueError as exc:
|
| 141 |
+
return ctx._helper_error(
|
| 142 |
+
start_calls=start_calls,
|
| 143 |
+
source="/api/collections",
|
| 144 |
+
error=exc,
|
| 145 |
+
)
|
| 146 |
+
total_matched = len(items)
|
| 147 |
+
items = items[:applied_limit]
|
| 148 |
+
try:
|
| 149 |
+
items = ctx._project_collection_items(items, fields)
|
| 150 |
+
except ValueError as exc:
|
| 151 |
+
return ctx._helper_error(
|
| 152 |
+
start_calls=start_calls,
|
| 153 |
+
source="/api/collections",
|
| 154 |
+
error=exc,
|
| 155 |
+
)
|
| 156 |
+
truncated = (
|
| 157 |
+
applied_limit > 0 and total_matched > applied_limit
|
| 158 |
+
or (applied_limit == 0 and len(payload) >= fetch_limit)
|
| 159 |
+
)
|
| 160 |
+
return ctx._helper_success(
|
| 161 |
+
start_calls=start_calls,
|
| 162 |
+
source="/api/collections",
|
| 163 |
+
items=items,
|
| 164 |
+
scanned=len(payload),
|
| 165 |
+
matched=total_matched,
|
| 166 |
+
returned=len(items),
|
| 167 |
+
total=len(payload),
|
| 168 |
+
total_matched=total_matched,
|
| 169 |
+
total_population=len(payload),
|
| 170 |
+
truncated=truncated,
|
| 171 |
+
complete=not truncated,
|
| 172 |
+
query=term,
|
| 173 |
+
owner=owner_clean,
|
| 174 |
+
owner_case_insensitive_fallback=owner_fallback_used,
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
async def hf_collection_items(
|
| 179 |
+
ctx: HelperRuntimeContext,
|
| 180 |
+
collection_id: str,
|
| 181 |
+
repo_types: list[str] | None = None,
|
| 182 |
+
limit: int = 100,
|
| 183 |
+
count_only: bool = False,
|
| 184 |
+
where: dict[str, Any] | None = None,
|
| 185 |
+
fields: list[str] | None = None,
|
| 186 |
+
) -> dict[str, Any]:
|
| 187 |
+
start_calls = ctx.call_count["n"]
|
| 188 |
+
default_limit = ctx._policy_int("hf_collection_items", "default_limit", 100)
|
| 189 |
+
max_limit = ctx._policy_int(
|
| 190 |
+
"hf_collection_items", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 191 |
+
)
|
| 192 |
+
cid = str(collection_id or "").strip()
|
| 193 |
+
if not cid:
|
| 194 |
+
return ctx._helper_error(
|
| 195 |
+
start_calls=start_calls,
|
| 196 |
+
source="/api/collections/<collection_id>",
|
| 197 |
+
error="collection_id is required",
|
| 198 |
+
)
|
| 199 |
+
if count_only:
|
| 200 |
+
limit = 0
|
| 201 |
+
applied_limit = ctx._clamp_int(
|
| 202 |
+
limit,
|
| 203 |
+
default=default_limit,
|
| 204 |
+
minimum=0,
|
| 205 |
+
maximum=max_limit,
|
| 206 |
+
)
|
| 207 |
+
allowed_repo_types: set[str] | None = None
|
| 208 |
+
try:
|
| 209 |
+
raw_repo_types = (
|
| 210 |
+
ctx._coerce_str_list(repo_types) if repo_types is not None else []
|
| 211 |
+
)
|
| 212 |
+
except ValueError as exc:
|
| 213 |
+
return ctx._helper_error(
|
| 214 |
+
start_calls=start_calls,
|
| 215 |
+
source=f"/api/collections/{cid}",
|
| 216 |
+
error=exc,
|
| 217 |
+
collection_id=cid,
|
| 218 |
+
)
|
| 219 |
+
if raw_repo_types:
|
| 220 |
+
allowed_repo_types = set()
|
| 221 |
+
for raw in raw_repo_types:
|
| 222 |
+
canonical = ctx._canonical_repo_type(raw, default="")
|
| 223 |
+
if canonical not in {"model", "dataset", "space"}:
|
| 224 |
+
return ctx._helper_error(
|
| 225 |
+
start_calls=start_calls,
|
| 226 |
+
source=f"/api/collections/{cid}",
|
| 227 |
+
error=f"Unsupported repo_type '{raw}'",
|
| 228 |
+
collection_id=cid,
|
| 229 |
+
)
|
| 230 |
+
allowed_repo_types.add(canonical)
|
| 231 |
+
endpoint = f"/api/collections/{cid}"
|
| 232 |
+
resp = ctx._host_raw_call(endpoint)
|
| 233 |
+
if not resp.get("ok"):
|
| 234 |
+
return ctx._helper_error(
|
| 235 |
+
start_calls=start_calls,
|
| 236 |
+
source=endpoint,
|
| 237 |
+
error=resp.get("error") or "collection fetch failed",
|
| 238 |
+
collection_id=cid,
|
| 239 |
+
)
|
| 240 |
+
payload = resp.get("data") if isinstance(resp.get("data"), dict) else {}
|
| 241 |
+
raw_items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
| 242 |
+
owner = ctx._author_from_any(payload.get("owner"))
|
| 243 |
+
owner_payload = (
|
| 244 |
+
payload.get("owner") if isinstance(payload.get("owner"), dict) else {}
|
| 245 |
+
)
|
| 246 |
+
if owner is None and "/" in cid:
|
| 247 |
+
owner = cid.split("/", 1)[0]
|
| 248 |
+
try:
|
| 249 |
+
normalized_where = ctx._normalize_where(
|
| 250 |
+
where, allowed_fields=REPO_CANONICAL_FIELDS
|
| 251 |
+
)
|
| 252 |
+
except ValueError as exc:
|
| 253 |
+
return ctx._helper_error(
|
| 254 |
+
start_calls=start_calls,
|
| 255 |
+
source=endpoint,
|
| 256 |
+
error=exc,
|
| 257 |
+
collection_id=cid,
|
| 258 |
+
)
|
| 259 |
+
normalized: list[dict[str, Any]] = []
|
| 260 |
+
for row in raw_items:
|
| 261 |
+
if not isinstance(row, dict):
|
| 262 |
+
continue
|
| 263 |
+
item = ctx._normalize_collection_repo_item(row)
|
| 264 |
+
if item is None:
|
| 265 |
+
continue
|
| 266 |
+
repo_type = item.get("repo_type")
|
| 267 |
+
if allowed_repo_types is not None and repo_type not in allowed_repo_types:
|
| 268 |
+
continue
|
| 269 |
+
if not ctx._item_matches_where(item, normalized_where):
|
| 270 |
+
continue
|
| 271 |
+
normalized.append(item)
|
| 272 |
+
total_matched = len(normalized)
|
| 273 |
+
items = [] if count_only else normalized[:applied_limit]
|
| 274 |
+
try:
|
| 275 |
+
items = ctx._project_repo_items(items, fields)
|
| 276 |
+
except ValueError as exc:
|
| 277 |
+
return ctx._helper_error(
|
| 278 |
+
start_calls=start_calls,
|
| 279 |
+
source=endpoint,
|
| 280 |
+
error=exc,
|
| 281 |
+
collection_id=cid,
|
| 282 |
+
)
|
| 283 |
+
truncated = applied_limit > 0 and total_matched > applied_limit
|
| 284 |
+
return ctx._helper_success(
|
| 285 |
+
start_calls=start_calls,
|
| 286 |
+
source=endpoint,
|
| 287 |
+
items=items,
|
| 288 |
+
scanned=len(raw_items),
|
| 289 |
+
matched=total_matched,
|
| 290 |
+
returned=len(items),
|
| 291 |
+
total=len(raw_items),
|
| 292 |
+
total_matched=total_matched,
|
| 293 |
+
total_population=len(raw_items),
|
| 294 |
+
truncated=truncated,
|
| 295 |
+
complete=not truncated,
|
| 296 |
+
collection_id=cid,
|
| 297 |
+
title=payload.get("title"),
|
| 298 |
+
owner=owner,
|
| 299 |
+
owner_type=owner_payload.get("type")
|
| 300 |
+
if isinstance(owner_payload.get("type"), str)
|
| 301 |
+
else None,
|
| 302 |
+
repo_types=sorted(allowed_repo_types)
|
| 303 |
+
if allowed_repo_types is not None
|
| 304 |
+
else None,
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def register_collection_helpers(
|
| 309 |
+
ctx: HelperRuntimeContext,
|
| 310 |
+
) -> dict[str, Callable[..., Any]]:
|
| 311 |
+
return {
|
| 312 |
+
"hf_collections_search": partial(hf_collections_search, ctx),
|
| 313 |
+
"hf_collection_items": partial(hf_collection_items, ctx),
|
| 314 |
+
}
|
.prod/monty_api/helpers/common.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
from ..context_types import HelperRuntimeContext
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
async def resolve_username_or_current(
|
| 8 |
+
ctx: HelperRuntimeContext,
|
| 9 |
+
username: str | None,
|
| 10 |
+
) -> tuple[str | None, str | None]:
|
| 11 |
+
resolved = str(username or "").strip()
|
| 12 |
+
if resolved:
|
| 13 |
+
return resolved, None
|
| 14 |
+
|
| 15 |
+
whoami = await ctx.call_helper("hf_whoami")
|
| 16 |
+
if whoami.get("ok") is not True:
|
| 17 |
+
return (
|
| 18 |
+
None,
|
| 19 |
+
str(whoami.get("error") or "Could not resolve current authenticated user"),
|
| 20 |
+
)
|
| 21 |
+
item = ctx._helper_item(whoami)
|
| 22 |
+
current = item.get("username") if isinstance(item, dict) else None
|
| 23 |
+
if not isinstance(current, str) or not current.strip():
|
| 24 |
+
return (
|
| 25 |
+
None,
|
| 26 |
+
"username was not provided and current authenticated user could not be resolved",
|
| 27 |
+
)
|
| 28 |
+
return current.strip(), None
|
.prod/monty_api/helpers/introspection.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
|
| 4 |
+
import inspect
|
| 5 |
+
from functools import partial
|
| 6 |
+
from typing import Any, Callable
|
| 7 |
+
|
| 8 |
+
from ..helper_contracts import build_helper_contracts
|
| 9 |
+
from ..constants import (
|
| 10 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 11 |
+
ACTOR_CANONICAL_FIELDS,
|
| 12 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 13 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 14 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 15 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 16 |
+
DEFAULT_MAX_CALLS,
|
| 17 |
+
DEFAULT_TIMEOUT_SEC,
|
| 18 |
+
GRAPH_SCAN_LIMIT_CAP,
|
| 19 |
+
LIKES_SCAN_LIMIT_CAP,
|
| 20 |
+
MAX_CALLS_LIMIT,
|
| 21 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 22 |
+
PROFILE_CANONICAL_FIELDS,
|
| 23 |
+
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 24 |
+
REPO_CANONICAL_FIELDS,
|
| 25 |
+
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 26 |
+
USER_CANONICAL_FIELDS,
|
| 27 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 28 |
+
)
|
| 29 |
+
from ..context_types import HelperRuntimeContext
|
| 30 |
+
from ..registry import (
|
| 31 |
+
HELPER_COVERED_ENDPOINT_PATTERNS,
|
| 32 |
+
HELPER_DEFAULT_METADATA,
|
| 33 |
+
PAGINATION_POLICY,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _render_annotation(annotation: Any) -> str:
|
| 38 |
+
if annotation is inspect.Signature.empty:
|
| 39 |
+
return "Any"
|
| 40 |
+
return str(annotation)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _render_default(default: Any) -> str | None:
|
| 44 |
+
if default is inspect.Signature.empty:
|
| 45 |
+
return None
|
| 46 |
+
return repr(default)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _signature_payload(fn: Callable[..., Any]) -> dict[str, Any]:
|
| 50 |
+
signature = inspect.signature(fn)
|
| 51 |
+
parameters: list[dict[str, Any]] = []
|
| 52 |
+
for parameter in signature.parameters.values():
|
| 53 |
+
item: dict[str, Any] = {
|
| 54 |
+
"name": parameter.name,
|
| 55 |
+
"kind": str(parameter.kind).replace("Parameter.", "").lower(),
|
| 56 |
+
"annotation": _render_annotation(parameter.annotation),
|
| 57 |
+
"required": parameter.default is inspect.Signature.empty,
|
| 58 |
+
}
|
| 59 |
+
default = _render_default(parameter.default)
|
| 60 |
+
if default is not None:
|
| 61 |
+
item["default"] = default
|
| 62 |
+
parameters.append(item)
|
| 63 |
+
return {
|
| 64 |
+
"parameters": parameters,
|
| 65 |
+
"returns": _render_annotation(signature.return_annotation),
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
async def hf_runtime_capabilities(
|
| 70 |
+
ctx: HelperRuntimeContext,
|
| 71 |
+
section: str | None = None,
|
| 72 |
+
) -> dict[str, Any]:
|
| 73 |
+
start_calls = ctx.call_count["n"]
|
| 74 |
+
ctx.internal_helper_used["used"] = True
|
| 75 |
+
|
| 76 |
+
helper_functions = {
|
| 77 |
+
**ctx.helper_registry,
|
| 78 |
+
"hf_runtime_capabilities": partial(hf_runtime_capabilities, ctx),
|
| 79 |
+
}
|
| 80 |
+
helper_payload = {
|
| 81 |
+
name: _signature_payload(fn) for name, fn in sorted(helper_functions.items())
|
| 82 |
+
}
|
| 83 |
+
helper_contracts = build_helper_contracts(helper_functions)
|
| 84 |
+
repo_type_helper_names = {
|
| 85 |
+
"dataset": "hf_datasets_search",
|
| 86 |
+
"model": "hf_models_search",
|
| 87 |
+
"space": "hf_spaces_search",
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
def _helper_contract(name: str) -> dict[str, Any]:
|
| 91 |
+
contract = helper_contracts.get(name)
|
| 92 |
+
return dict(contract) if isinstance(contract, dict) else {}
|
| 93 |
+
|
| 94 |
+
def _type_specific_params(name: str) -> list[str]:
|
| 95 |
+
params = _helper_contract(name).get("supported_params")
|
| 96 |
+
if not isinstance(params, list):
|
| 97 |
+
return []
|
| 98 |
+
common = {
|
| 99 |
+
"search",
|
| 100 |
+
"filter",
|
| 101 |
+
"author",
|
| 102 |
+
"sort",
|
| 103 |
+
"limit",
|
| 104 |
+
"expand",
|
| 105 |
+
"full",
|
| 106 |
+
"fields",
|
| 107 |
+
"post_filter",
|
| 108 |
+
}
|
| 109 |
+
return [param for param in params if param not in common]
|
| 110 |
+
|
| 111 |
+
manifest: dict[str, Any] = {
|
| 112 |
+
"overview": {
|
| 113 |
+
"helper_count": len(helper_functions),
|
| 114 |
+
"supports_current_user": True,
|
| 115 |
+
"helper_result_envelope": {
|
| 116 |
+
"ok": "bool",
|
| 117 |
+
"item": "dict | None",
|
| 118 |
+
"items": "list[dict]",
|
| 119 |
+
"meta": "dict",
|
| 120 |
+
"error": "str | None",
|
| 121 |
+
},
|
| 122 |
+
"raw_result_envelope": {
|
| 123 |
+
"result": "Any",
|
| 124 |
+
"meta": {
|
| 125 |
+
"ok": "bool",
|
| 126 |
+
"api_calls": "int",
|
| 127 |
+
"elapsed_ms": "int",
|
| 128 |
+
"limits_reached": "bool",
|
| 129 |
+
"limit_summary": "list[dict]",
|
| 130 |
+
},
|
| 131 |
+
},
|
| 132 |
+
},
|
| 133 |
+
"helpers": helper_payload,
|
| 134 |
+
"helper_contracts": helper_contracts,
|
| 135 |
+
"fields": {
|
| 136 |
+
"profile": list(PROFILE_CANONICAL_FIELDS),
|
| 137 |
+
"repo": list(REPO_CANONICAL_FIELDS),
|
| 138 |
+
"user": list(USER_CANONICAL_FIELDS),
|
| 139 |
+
"actor": list(ACTOR_CANONICAL_FIELDS),
|
| 140 |
+
"user_likes": list(USER_LIKES_CANONICAL_FIELDS),
|
| 141 |
+
"activity": list(ACTIVITY_CANONICAL_FIELDS),
|
| 142 |
+
"collection": list(COLLECTION_CANONICAL_FIELDS),
|
| 143 |
+
"daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
|
| 144 |
+
"discussion": list(DISCUSSION_CANONICAL_FIELDS),
|
| 145 |
+
"discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
| 146 |
+
},
|
| 147 |
+
"helper_defaults": {
|
| 148 |
+
helper_name: dict(sorted(metadata.items()))
|
| 149 |
+
for helper_name, metadata in sorted(HELPER_DEFAULT_METADATA.items())
|
| 150 |
+
},
|
| 151 |
+
"limits": {
|
| 152 |
+
"default_timeout_sec": DEFAULT_TIMEOUT_SEC,
|
| 153 |
+
"default_max_calls": DEFAULT_MAX_CALLS,
|
| 154 |
+
"max_calls_limit": MAX_CALLS_LIMIT,
|
| 155 |
+
"output_items_truncation_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 156 |
+
"graph_scan_limit_cap": GRAPH_SCAN_LIMIT_CAP,
|
| 157 |
+
"likes_scan_limit_cap": LIKES_SCAN_LIMIT_CAP,
|
| 158 |
+
"recent_activity_scan_max_pages": RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 159 |
+
"trending_endpoint_max_limit": TRENDING_ENDPOINT_MAX_LIMIT,
|
| 160 |
+
"pagination_policy": {
|
| 161 |
+
helper_name: dict(sorted(policy.items()))
|
| 162 |
+
for helper_name, policy in sorted(PAGINATION_POLICY.items())
|
| 163 |
+
},
|
| 164 |
+
"helper_covered_endpoint_patterns": [
|
| 165 |
+
{"pattern": pattern, "helper": helper_name}
|
| 166 |
+
for pattern, helper_name in HELPER_COVERED_ENDPOINT_PATTERNS
|
| 167 |
+
],
|
| 168 |
+
},
|
| 169 |
+
"repo_search": {
|
| 170 |
+
"helper_selection": {
|
| 171 |
+
"preferred_rule": (
|
| 172 |
+
"Prefer hf_models_search for model queries, hf_datasets_search for "
|
| 173 |
+
"dataset queries, and hf_spaces_search for space queries. Use "
|
| 174 |
+
"hf_repo_search only for intentionally cross-type search."
|
| 175 |
+
),
|
| 176 |
+
"model": "hf_models_search",
|
| 177 |
+
"dataset": "hf_datasets_search",
|
| 178 |
+
"space": "hf_spaces_search",
|
| 179 |
+
"cross_type": "hf_repo_search",
|
| 180 |
+
},
|
| 181 |
+
"can_do": [
|
| 182 |
+
"search models",
|
| 183 |
+
"search datasets",
|
| 184 |
+
"search spaces",
|
| 185 |
+
"search across multiple repo types",
|
| 186 |
+
"project selected fields",
|
| 187 |
+
"apply local post-fetch row filtering",
|
| 188 |
+
],
|
| 189 |
+
"parameter_contract": {
|
| 190 |
+
"search": {
|
| 191 |
+
"meaning": "Upstream Hugging Face search text.",
|
| 192 |
+
},
|
| 193 |
+
"filter": {
|
| 194 |
+
"meaning": (
|
| 195 |
+
"Upstream Hugging Face filter/tag argument passed directly into "
|
| 196 |
+
"the Hub client."
|
| 197 |
+
),
|
| 198 |
+
},
|
| 199 |
+
"post_filter": {
|
| 200 |
+
"meaning": (
|
| 201 |
+
"Local predicate applied after the rows are fetched and normalized."
|
| 202 |
+
),
|
| 203 |
+
"recommended_shapes": [
|
| 204 |
+
{"runtime_stage": "RUNNING"},
|
| 205 |
+
{"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}},
|
| 206 |
+
{"downloads": {"gte": 1000}},
|
| 207 |
+
{"likes": {"lte": 5000}},
|
| 208 |
+
],
|
| 209 |
+
"prefer_for": [
|
| 210 |
+
"normalized returned fields such as runtime_stage",
|
| 211 |
+
"downloads / likes thresholds after a broad search",
|
| 212 |
+
],
|
| 213 |
+
"avoid_when": [
|
| 214 |
+
"author is already a first-class helper argument",
|
| 215 |
+
"pipeline_tag is already a first-class model-search argument",
|
| 216 |
+
"dataset_name, language, task_ids, apps, models, or datasets already have first-class helper args",
|
| 217 |
+
],
|
| 218 |
+
},
|
| 219 |
+
"fields": {
|
| 220 |
+
"meaning": "Select which normalized row fields are returned to the caller.",
|
| 221 |
+
"canonical_only": True,
|
| 222 |
+
},
|
| 223 |
+
},
|
| 224 |
+
"repo_type_specific_helpers": {
|
| 225 |
+
repo_type: {
|
| 226 |
+
"helper": helper_name,
|
| 227 |
+
"supported_params": _helper_contract(helper_name).get(
|
| 228 |
+
"supported_params"
|
| 229 |
+
),
|
| 230 |
+
"type_specific_params": _type_specific_params(helper_name),
|
| 231 |
+
"sort_values": _helper_contract(helper_name).get("sort_values"),
|
| 232 |
+
"expand_values": _helper_contract(helper_name).get("expand_values"),
|
| 233 |
+
"fields_contract": _helper_contract(helper_name).get(
|
| 234 |
+
"fields_contract"
|
| 235 |
+
),
|
| 236 |
+
"post_filter_contract": _helper_contract(helper_name).get(
|
| 237 |
+
"post_filter_contract"
|
| 238 |
+
),
|
| 239 |
+
}
|
| 240 |
+
for repo_type, helper_name in sorted(repo_type_helper_names.items())
|
| 241 |
+
},
|
| 242 |
+
"generic_helper": {
|
| 243 |
+
"helper": "hf_repo_search",
|
| 244 |
+
"use_for": "Intentionally cross-type search only.",
|
| 245 |
+
"supports": _helper_contract("hf_repo_search").get("supported_params"),
|
| 246 |
+
"sort_values_by_repo_type": _helper_contract("hf_repo_search").get(
|
| 247 |
+
"sort_values_by_repo_type"
|
| 248 |
+
),
|
| 249 |
+
"fields_contract": _helper_contract("hf_repo_search").get(
|
| 250 |
+
"fields_contract"
|
| 251 |
+
),
|
| 252 |
+
"post_filter_contract": _helper_contract("hf_repo_search").get(
|
| 253 |
+
"post_filter_contract"
|
| 254 |
+
),
|
| 255 |
+
"does_not_support": [
|
| 256 |
+
"repo-type-specific knobs such as pipeline_tag or dataset_name",
|
| 257 |
+
"nested advanced routing",
|
| 258 |
+
],
|
| 259 |
+
},
|
| 260 |
+
"space_runtime_contract": {
|
| 261 |
+
"returned_field": "runtime_stage",
|
| 262 |
+
"full_runtime_field": "runtime",
|
| 263 |
+
"preferred_filter_channel": "post_filter",
|
| 264 |
+
"note": (
|
| 265 |
+
"Treat runtime_stage like any other returned field: use exact values "
|
| 266 |
+
"or an 'in' list in post_filter."
|
| 267 |
+
),
|
| 268 |
+
"common_values": ["BUILD_ERROR", "RUNTIME_ERROR", "RUNNING", "SLEEPING"],
|
| 269 |
+
},
|
| 270 |
+
},
|
| 271 |
+
}
|
| 272 |
+
allowed_sections = sorted(manifest)
|
| 273 |
+
requested = str(section or "").strip().lower()
|
| 274 |
+
if requested:
|
| 275 |
+
if requested not in manifest:
|
| 276 |
+
return ctx._helper_error(
|
| 277 |
+
start_calls=start_calls,
|
| 278 |
+
source="internal://runtime-capabilities",
|
| 279 |
+
error=f"Unsupported section {section!r}. Allowed sections: {allowed_sections}",
|
| 280 |
+
section=section,
|
| 281 |
+
allowed_sections=allowed_sections,
|
| 282 |
+
)
|
| 283 |
+
payload = {
|
| 284 |
+
"section": requested,
|
| 285 |
+
"content": manifest[requested],
|
| 286 |
+
"allowed_sections": allowed_sections,
|
| 287 |
+
}
|
| 288 |
+
else:
|
| 289 |
+
payload = {"allowed_sections": allowed_sections, **manifest}
|
| 290 |
+
return ctx._helper_success(
|
| 291 |
+
start_calls=start_calls,
|
| 292 |
+
source="internal://runtime-capabilities",
|
| 293 |
+
items=[payload],
|
| 294 |
+
section=requested or None,
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def register_introspection_helpers(
|
| 299 |
+
ctx: HelperRuntimeContext,
|
| 300 |
+
) -> dict[str, Callable[..., Any]]:
|
| 301 |
+
return {"hf_runtime_capabilities": partial(hf_runtime_capabilities, ctx)}
|
.prod/monty_api/helpers/profiles.py
ADDED
|
@@ -0,0 +1,861 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
|
| 4 |
+
from itertools import islice
|
| 5 |
+
import re
|
| 6 |
+
from typing import Any, Callable
|
| 7 |
+
from ..context_types import HelperRuntimeContext
|
| 8 |
+
from ..constants import (
|
| 9 |
+
ACTOR_CANONICAL_FIELDS,
|
| 10 |
+
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 11 |
+
GRAPH_SCAN_LIMIT_CAP,
|
| 12 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 13 |
+
USER_SUMMARY_ACTIVITY_MAX_PAGES,
|
| 14 |
+
USER_SUMMARY_LIKES_SCAN_LIMIT,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
from .common import resolve_username_or_current
|
| 19 |
+
|
| 20 |
+
from functools import partial
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _clean_social_handle(value: Any) -> str | None:
|
| 24 |
+
if not isinstance(value, str):
|
| 25 |
+
return None
|
| 26 |
+
cleaned = value.strip()
|
| 27 |
+
if not cleaned:
|
| 28 |
+
return None
|
| 29 |
+
if re.match("^https?://", cleaned, flags=re.IGNORECASE):
|
| 30 |
+
return cleaned
|
| 31 |
+
return cleaned.lstrip("@")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _social_url(kind: str, value: Any) -> str | None:
|
| 35 |
+
cleaned = _clean_social_handle(value)
|
| 36 |
+
if cleaned is None:
|
| 37 |
+
return None
|
| 38 |
+
if re.match("^https?://", cleaned, flags=re.IGNORECASE):
|
| 39 |
+
return cleaned
|
| 40 |
+
if kind == "twitter":
|
| 41 |
+
return f"https://twitter.com/{cleaned}"
|
| 42 |
+
if kind == "github":
|
| 43 |
+
return f"https://github.com/{cleaned}"
|
| 44 |
+
if kind == "linkedin":
|
| 45 |
+
if cleaned.startswith(("in/", "company/")):
|
| 46 |
+
return f"https://www.linkedin.com/{cleaned}"
|
| 47 |
+
return f"https://www.linkedin.com/in/{cleaned}"
|
| 48 |
+
if kind == "bluesky":
|
| 49 |
+
return f"https://bsky.app/profile/{cleaned}"
|
| 50 |
+
return cleaned
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
async def hf_whoami(ctx: HelperRuntimeContext) -> dict[str, Any]:
|
| 54 |
+
start_calls = ctx.call_count["n"]
|
| 55 |
+
endpoint = "/api/whoami-v2"
|
| 56 |
+
token = ctx._load_token()
|
| 57 |
+
if token is None:
|
| 58 |
+
return ctx._helper_error(
|
| 59 |
+
start_calls=start_calls,
|
| 60 |
+
source=endpoint,
|
| 61 |
+
error="Current authenticated user is unavailable for this request. No request-scoped or fallback HF token was found.",
|
| 62 |
+
)
|
| 63 |
+
try:
|
| 64 |
+
payload = ctx._host_hf_call(
|
| 65 |
+
endpoint,
|
| 66 |
+
lambda: ctx._get_hf_api_client().whoami(token=token, cache=True),
|
| 67 |
+
)
|
| 68 |
+
except Exception as e:
|
| 69 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 70 |
+
username = payload.get("name") or payload.get("user") or payload.get("username")
|
| 71 |
+
item = {
|
| 72 |
+
"username": username,
|
| 73 |
+
"fullname": payload.get("fullname"),
|
| 74 |
+
"is_pro": payload.get("isPro"),
|
| 75 |
+
}
|
| 76 |
+
items = [item] if isinstance(username, str) and username else []
|
| 77 |
+
return ctx._helper_success(
|
| 78 |
+
start_calls=start_calls,
|
| 79 |
+
source=endpoint,
|
| 80 |
+
items=items,
|
| 81 |
+
scanned=1,
|
| 82 |
+
matched=len(items),
|
| 83 |
+
returned=len(items),
|
| 84 |
+
truncated=False,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[str, Any]:
|
| 89 |
+
start_calls = ctx.call_count["n"]
|
| 90 |
+
u = str(username or "").strip()
|
| 91 |
+
if not u:
|
| 92 |
+
return ctx._helper_error(
|
| 93 |
+
start_calls=start_calls,
|
| 94 |
+
source="/api/users/<u>/overview",
|
| 95 |
+
error="username is required",
|
| 96 |
+
)
|
| 97 |
+
endpoint = f"/api/users/{u}/overview"
|
| 98 |
+
try:
|
| 99 |
+
obj = ctx._host_hf_call(
|
| 100 |
+
endpoint, lambda: ctx._get_hf_api_client().get_user_overview(u)
|
| 101 |
+
)
|
| 102 |
+
except Exception as e:
|
| 103 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 104 |
+
twitter = getattr(obj, "twitter", None) or getattr(obj, "twitterUsername", None)
|
| 105 |
+
github = getattr(obj, "github", None) or getattr(obj, "githubUsername", None)
|
| 106 |
+
linkedin = getattr(obj, "linkedin", None) or getattr(obj, "linkedinUsername", None)
|
| 107 |
+
bluesky = getattr(obj, "bluesky", None) or getattr(obj, "blueskyUsername", None)
|
| 108 |
+
if ctx._budget_remaining() > 0 and any(
|
| 109 |
+
(v in {None, ""} for v in [twitter, github, linkedin, bluesky])
|
| 110 |
+
):
|
| 111 |
+
socials_ep = f"/api/users/{u}/socials"
|
| 112 |
+
socials_resp = ctx._host_raw_call(socials_ep)
|
| 113 |
+
if socials_resp.get("ok"):
|
| 114 |
+
socials_payload = (
|
| 115 |
+
socials_resp.get("data")
|
| 116 |
+
if isinstance(socials_resp.get("data"), dict)
|
| 117 |
+
else {}
|
| 118 |
+
)
|
| 119 |
+
handles = (
|
| 120 |
+
socials_payload.get("socialHandles")
|
| 121 |
+
if isinstance(socials_payload.get("socialHandles"), dict)
|
| 122 |
+
else {}
|
| 123 |
+
)
|
| 124 |
+
twitter = twitter or handles.get("twitter")
|
| 125 |
+
github = github or handles.get("github")
|
| 126 |
+
linkedin = linkedin or handles.get("linkedin")
|
| 127 |
+
bluesky = bluesky or handles.get("bluesky")
|
| 128 |
+
orgs_raw = getattr(obj, "orgs", None)
|
| 129 |
+
org_names: list[str] | None = None
|
| 130 |
+
if isinstance(orgs_raw, (list, tuple, set)):
|
| 131 |
+
names = []
|
| 132 |
+
for org in orgs_raw:
|
| 133 |
+
if isinstance(org, str) and org.strip():
|
| 134 |
+
names.append(org.strip())
|
| 135 |
+
continue
|
| 136 |
+
name = getattr(org, "name", None)
|
| 137 |
+
if isinstance(name, str) and name.strip():
|
| 138 |
+
names.append(name.strip())
|
| 139 |
+
org_names = names or None
|
| 140 |
+
twitter_handle = _clean_social_handle(twitter)
|
| 141 |
+
github_handle = _clean_social_handle(github)
|
| 142 |
+
linkedin_handle = _clean_social_handle(linkedin)
|
| 143 |
+
bluesky_handle = _clean_social_handle(bluesky)
|
| 144 |
+
item = {
|
| 145 |
+
"username": obj.username or u,
|
| 146 |
+
"fullname": obj.fullname,
|
| 147 |
+
"bio": getattr(obj, "details", None),
|
| 148 |
+
"avatar_url": obj.avatar_url,
|
| 149 |
+
"website_url": getattr(obj, "websiteUrl", None),
|
| 150 |
+
"twitter": _social_url("twitter", twitter_handle),
|
| 151 |
+
"github": _social_url("github", github_handle),
|
| 152 |
+
"linkedin": _social_url("linkedin", linkedin_handle),
|
| 153 |
+
"bluesky": _social_url("bluesky", bluesky_handle),
|
| 154 |
+
"twitter_handle": twitter_handle,
|
| 155 |
+
"github_handle": github_handle,
|
| 156 |
+
"linkedin_handle": linkedin_handle,
|
| 157 |
+
"bluesky_handle": bluesky_handle,
|
| 158 |
+
"followers": ctx._as_int(obj.num_followers),
|
| 159 |
+
"following": ctx._as_int(obj.num_following),
|
| 160 |
+
"likes": ctx._as_int(obj.num_likes),
|
| 161 |
+
"models": ctx._as_int(getattr(obj, "num_models", None)),
|
| 162 |
+
"datasets": ctx._as_int(getattr(obj, "num_datasets", None)),
|
| 163 |
+
"spaces": ctx._as_int(getattr(obj, "num_spaces", None)),
|
| 164 |
+
"discussions": ctx._as_int(getattr(obj, "num_discussions", None)),
|
| 165 |
+
"papers": ctx._as_int(getattr(obj, "num_papers", None)),
|
| 166 |
+
"upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
|
| 167 |
+
"orgs": org_names,
|
| 168 |
+
"is_pro": obj.is_pro,
|
| 169 |
+
}
|
| 170 |
+
return ctx._helper_success(
|
| 171 |
+
start_calls=start_calls,
|
| 172 |
+
source=endpoint,
|
| 173 |
+
items=[item],
|
| 174 |
+
scanned=1,
|
| 175 |
+
matched=1,
|
| 176 |
+
returned=1,
|
| 177 |
+
truncated=False,
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
async def _hf_org_overview(
|
| 182 |
+
ctx: HelperRuntimeContext, organization: str
|
| 183 |
+
) -> dict[str, Any]:
|
| 184 |
+
start_calls = ctx.call_count["n"]
|
| 185 |
+
org = str(organization or "").strip()
|
| 186 |
+
if not org:
|
| 187 |
+
return ctx._helper_error(
|
| 188 |
+
start_calls=start_calls,
|
| 189 |
+
source="/api/organizations/<o>/overview",
|
| 190 |
+
error="organization is required",
|
| 191 |
+
)
|
| 192 |
+
endpoint = f"/api/organizations/{org}/overview"
|
| 193 |
+
try:
|
| 194 |
+
obj = ctx._host_hf_call(
|
| 195 |
+
endpoint,
|
| 196 |
+
lambda: ctx._get_hf_api_client().get_organization_overview(org),
|
| 197 |
+
)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 200 |
+
item = {
|
| 201 |
+
"organization": obj.name or org,
|
| 202 |
+
"display_name": obj.fullname,
|
| 203 |
+
"avatar_url": obj.avatar_url,
|
| 204 |
+
"description": obj.details,
|
| 205 |
+
"website_url": getattr(obj, "websiteUrl", None),
|
| 206 |
+
"followers": ctx._as_int(obj.num_followers),
|
| 207 |
+
"members": ctx._as_int(obj.num_users),
|
| 208 |
+
"models": ctx._as_int(getattr(obj, "num_models", None)),
|
| 209 |
+
"datasets": ctx._as_int(getattr(obj, "num_datasets", None)),
|
| 210 |
+
"spaces": ctx._as_int(getattr(obj, "num_spaces", None)),
|
| 211 |
+
}
|
| 212 |
+
return ctx._helper_success(
|
| 213 |
+
start_calls=start_calls,
|
| 214 |
+
source=endpoint,
|
| 215 |
+
items=[item],
|
| 216 |
+
scanned=1,
|
| 217 |
+
matched=1,
|
| 218 |
+
returned=1,
|
| 219 |
+
truncated=False,
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
async def hf_org_members(
|
| 224 |
+
ctx: HelperRuntimeContext,
|
| 225 |
+
organization: str,
|
| 226 |
+
limit: int | None = None,
|
| 227 |
+
scan_limit: int | None = None,
|
| 228 |
+
count_only: bool = False,
|
| 229 |
+
where: dict[str, Any] | None = None,
|
| 230 |
+
fields: list[str] | None = None,
|
| 231 |
+
) -> dict[str, Any]:
|
| 232 |
+
start_calls = ctx.call_count["n"]
|
| 233 |
+
org = str(organization or "").strip()
|
| 234 |
+
if not org:
|
| 235 |
+
return ctx._helper_error(
|
| 236 |
+
start_calls=start_calls,
|
| 237 |
+
source="/api/organizations/<o>/members",
|
| 238 |
+
error="organization is required",
|
| 239 |
+
)
|
| 240 |
+
default_limit = ctx._policy_int("hf_org_members", "default_limit", 100)
|
| 241 |
+
scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 242 |
+
limit_plan = ctx._resolve_exhaustive_limits(
|
| 243 |
+
limit=limit,
|
| 244 |
+
count_only=count_only,
|
| 245 |
+
default_limit=default_limit,
|
| 246 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 247 |
+
scan_limit=scan_limit,
|
| 248 |
+
scan_cap=scan_cap,
|
| 249 |
+
)
|
| 250 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 251 |
+
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 252 |
+
has_where = isinstance(where, dict) and bool(where)
|
| 253 |
+
overview_total: int | None = None
|
| 254 |
+
overview_source = f"/api/organizations/{org}/overview"
|
| 255 |
+
if ctx._budget_remaining() > 0:
|
| 256 |
+
try:
|
| 257 |
+
org_obj = ctx._host_hf_call(
|
| 258 |
+
overview_source,
|
| 259 |
+
lambda: ctx._get_hf_api_client().get_organization_overview(org),
|
| 260 |
+
)
|
| 261 |
+
overview_total = ctx._as_int(getattr(org_obj, "num_users", None))
|
| 262 |
+
except Exception:
|
| 263 |
+
overview_total = None
|
| 264 |
+
if count_only and (not has_where) and (overview_total is not None):
|
| 265 |
+
return ctx._overview_count_only_success(
|
| 266 |
+
start_calls=start_calls,
|
| 267 |
+
source=overview_source,
|
| 268 |
+
total=overview_total,
|
| 269 |
+
limit_plan=limit_plan,
|
| 270 |
+
base_meta={
|
| 271 |
+
"scanned": 1,
|
| 272 |
+
"count_source": "overview",
|
| 273 |
+
"organization": org,
|
| 274 |
+
},
|
| 275 |
+
)
|
| 276 |
+
endpoint = f"/api/organizations/{org}/members"
|
| 277 |
+
try:
|
| 278 |
+
rows = ctx._host_hf_call(
|
| 279 |
+
endpoint,
|
| 280 |
+
lambda: list(
|
| 281 |
+
islice(
|
| 282 |
+
ctx._get_hf_api_client().list_organization_members(org),
|
| 283 |
+
scan_lim,
|
| 284 |
+
)
|
| 285 |
+
),
|
| 286 |
+
)
|
| 287 |
+
except Exception as e:
|
| 288 |
+
return ctx._helper_error(
|
| 289 |
+
start_calls=start_calls, source=endpoint, error=e, organization=org
|
| 290 |
+
)
|
| 291 |
+
normalized: list[dict[str, Any]] = []
|
| 292 |
+
for row in rows:
|
| 293 |
+
handle = getattr(row, "username", None)
|
| 294 |
+
if not isinstance(handle, str) or not handle:
|
| 295 |
+
continue
|
| 296 |
+
item = {
|
| 297 |
+
"username": handle,
|
| 298 |
+
"fullname": getattr(row, "fullname", None),
|
| 299 |
+
"is_pro": getattr(row, "is_pro", None),
|
| 300 |
+
"role": getattr(row, "role", None),
|
| 301 |
+
}
|
| 302 |
+
normalized.append(item)
|
| 303 |
+
try:
|
| 304 |
+
normalized = ctx._apply_where(
|
| 305 |
+
normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 306 |
+
)
|
| 307 |
+
except ValueError as exc:
|
| 308 |
+
return ctx._helper_error(
|
| 309 |
+
start_calls=start_calls,
|
| 310 |
+
source=endpoint,
|
| 311 |
+
error=exc,
|
| 312 |
+
organization=org,
|
| 313 |
+
)
|
| 314 |
+
observed_total = len(rows)
|
| 315 |
+
scan_exhaustive = observed_total < scan_lim
|
| 316 |
+
overview_list_mismatch = (
|
| 317 |
+
overview_total is not None
|
| 318 |
+
and scan_exhaustive
|
| 319 |
+
and (observed_total != overview_total)
|
| 320 |
+
)
|
| 321 |
+
if has_where:
|
| 322 |
+
exact_count = scan_exhaustive
|
| 323 |
+
total = len(normalized)
|
| 324 |
+
total_matched = len(normalized)
|
| 325 |
+
elif overview_total is not None:
|
| 326 |
+
exact_count = True
|
| 327 |
+
total = overview_total
|
| 328 |
+
total_matched = overview_total
|
| 329 |
+
else:
|
| 330 |
+
exact_count = scan_exhaustive
|
| 331 |
+
total = observed_total
|
| 332 |
+
total_matched = observed_total
|
| 333 |
+
total_available = overview_total if overview_total is not None else observed_total
|
| 334 |
+
items = normalized[:applied_limit]
|
| 335 |
+
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 336 |
+
count_source = (
|
| 337 |
+
"overview" if overview_total is not None and (not has_where) else "scan"
|
| 338 |
+
)
|
| 339 |
+
sample_complete = (
|
| 340 |
+
exact_count
|
| 341 |
+
and len(normalized) <= applied_limit
|
| 342 |
+
and (not count_only or len(normalized) == 0)
|
| 343 |
+
)
|
| 344 |
+
more_available = ctx._derive_more_available(
|
| 345 |
+
sample_complete=sample_complete,
|
| 346 |
+
exact_count=exact_count,
|
| 347 |
+
returned=len(items),
|
| 348 |
+
total=total,
|
| 349 |
+
)
|
| 350 |
+
if not exact_count and scan_limit_hit:
|
| 351 |
+
more_available = "unknown" if has_where else True
|
| 352 |
+
try:
|
| 353 |
+
items = ctx._project_actor_items(items, fields)
|
| 354 |
+
except ValueError as exc:
|
| 355 |
+
return ctx._helper_error(
|
| 356 |
+
start_calls=start_calls,
|
| 357 |
+
source=endpoint,
|
| 358 |
+
error=exc,
|
| 359 |
+
organization=org,
|
| 360 |
+
)
|
| 361 |
+
meta = ctx._build_exhaustive_result_meta(
|
| 362 |
+
base_meta={
|
| 363 |
+
"scanned": observed_total,
|
| 364 |
+
"total": total,
|
| 365 |
+
"total_available": total_available,
|
| 366 |
+
"total_matched": total_matched,
|
| 367 |
+
"count_source": count_source,
|
| 368 |
+
"lower_bound": bool(has_where and (not exact_count)),
|
| 369 |
+
"overview_total": overview_total,
|
| 370 |
+
"listed_total": observed_total,
|
| 371 |
+
"overview_list_mismatch": overview_list_mismatch,
|
| 372 |
+
"organization": org,
|
| 373 |
+
},
|
| 374 |
+
limit_plan=limit_plan,
|
| 375 |
+
matched_count=len(normalized),
|
| 376 |
+
returned_count=len(items),
|
| 377 |
+
exact_count=exact_count,
|
| 378 |
+
count_only=count_only,
|
| 379 |
+
sample_complete=sample_complete,
|
| 380 |
+
more_available=more_available,
|
| 381 |
+
scan_limit_hit=scan_limit_hit,
|
| 382 |
+
)
|
| 383 |
+
return ctx._helper_success(
|
| 384 |
+
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
async def _user_graph_helper(
|
| 389 |
+
ctx: HelperRuntimeContext,
|
| 390 |
+
kind: str,
|
| 391 |
+
username: str,
|
| 392 |
+
pro_only: bool | None,
|
| 393 |
+
limit: int | None,
|
| 394 |
+
scan_limit: int | None,
|
| 395 |
+
count_only: bool,
|
| 396 |
+
where: dict[str, Any] | None,
|
| 397 |
+
fields: list[str] | None,
|
| 398 |
+
*,
|
| 399 |
+
helper_name: str,
|
| 400 |
+
) -> dict[str, Any]:
|
| 401 |
+
start_calls = ctx.call_count["n"]
|
| 402 |
+
default_limit = ctx._policy_int(helper_name, "default_limit", 100)
|
| 403 |
+
scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
|
| 404 |
+
max_limit = ctx._policy_int(
|
| 405 |
+
helper_name, "max_limit", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
|
| 406 |
+
)
|
| 407 |
+
u = str(username or "").strip()
|
| 408 |
+
if not u:
|
| 409 |
+
return ctx._helper_error(
|
| 410 |
+
start_calls=start_calls,
|
| 411 |
+
source=f"/api/users/<u>/{kind}",
|
| 412 |
+
error="username is required",
|
| 413 |
+
)
|
| 414 |
+
limit_plan = ctx._resolve_exhaustive_limits(
|
| 415 |
+
limit=limit,
|
| 416 |
+
count_only=count_only,
|
| 417 |
+
default_limit=default_limit,
|
| 418 |
+
max_limit=max_limit,
|
| 419 |
+
scan_limit=scan_limit,
|
| 420 |
+
scan_cap=scan_cap,
|
| 421 |
+
)
|
| 422 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 423 |
+
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 424 |
+
has_where = isinstance(where, dict) and bool(where)
|
| 425 |
+
filtered = pro_only is not None or has_where
|
| 426 |
+
entity_type = "user"
|
| 427 |
+
overview_total: int | None = None
|
| 428 |
+
overview_source = f"/api/users/{u}/overview"
|
| 429 |
+
if ctx._budget_remaining() > 0:
|
| 430 |
+
try:
|
| 431 |
+
user_obj = ctx._host_hf_call(
|
| 432 |
+
overview_source,
|
| 433 |
+
lambda: ctx._get_hf_api_client().get_user_overview(u),
|
| 434 |
+
)
|
| 435 |
+
overview_total = ctx._as_int(
|
| 436 |
+
user_obj.num_followers
|
| 437 |
+
if kind == "followers"
|
| 438 |
+
else user_obj.num_following
|
| 439 |
+
)
|
| 440 |
+
except Exception:
|
| 441 |
+
org_overview_source = f"/api/organizations/{u}/overview"
|
| 442 |
+
try:
|
| 443 |
+
org_obj = ctx._host_hf_call(
|
| 444 |
+
org_overview_source,
|
| 445 |
+
lambda: ctx._get_hf_api_client().get_organization_overview(u),
|
| 446 |
+
)
|
| 447 |
+
except Exception:
|
| 448 |
+
overview_total = None
|
| 449 |
+
else:
|
| 450 |
+
entity_type = "organization"
|
| 451 |
+
overview_source = org_overview_source
|
| 452 |
+
if kind != "followers":
|
| 453 |
+
return ctx._helper_error(
|
| 454 |
+
start_calls=start_calls,
|
| 455 |
+
source=f"/api/organizations/{u}/{kind}",
|
| 456 |
+
error="organization graph only supports relation='followers'; organizations do not expose a following list",
|
| 457 |
+
relation=kind,
|
| 458 |
+
organization=u,
|
| 459 |
+
entity=u,
|
| 460 |
+
entity_type=entity_type,
|
| 461 |
+
)
|
| 462 |
+
overview_total = ctx._as_int(getattr(org_obj, "num_followers", None))
|
| 463 |
+
if count_only and (not filtered) and (overview_total is not None):
|
| 464 |
+
return ctx._overview_count_only_success(
|
| 465 |
+
start_calls=start_calls,
|
| 466 |
+
source=overview_source,
|
| 467 |
+
total=overview_total,
|
| 468 |
+
limit_plan=limit_plan,
|
| 469 |
+
base_meta={
|
| 470 |
+
"scanned": 1,
|
| 471 |
+
"count_source": "overview",
|
| 472 |
+
"relation": kind,
|
| 473 |
+
"pro_only": pro_only,
|
| 474 |
+
"where_applied": has_where,
|
| 475 |
+
"entity": u,
|
| 476 |
+
"entity_type": entity_type,
|
| 477 |
+
"username": u,
|
| 478 |
+
"organization": u if entity_type == "organization" else None,
|
| 479 |
+
},
|
| 480 |
+
)
|
| 481 |
+
endpoint = f"/api/users/{u}/{kind}"
|
| 482 |
+
try:
|
| 483 |
+
if entity_type == "organization":
|
| 484 |
+
endpoint = f"/api/organizations/{u}/followers"
|
| 485 |
+
rows = ctx._host_hf_call(
|
| 486 |
+
endpoint,
|
| 487 |
+
lambda: list(
|
| 488 |
+
islice(
|
| 489 |
+
ctx._get_hf_api_client().list_organization_followers(u),
|
| 490 |
+
scan_lim,
|
| 491 |
+
)
|
| 492 |
+
),
|
| 493 |
+
)
|
| 494 |
+
elif kind == "followers":
|
| 495 |
+
rows = ctx._host_hf_call(
|
| 496 |
+
endpoint,
|
| 497 |
+
lambda: list(
|
| 498 |
+
islice(ctx._get_hf_api_client().list_user_followers(u), scan_lim)
|
| 499 |
+
),
|
| 500 |
+
)
|
| 501 |
+
else:
|
| 502 |
+
rows = ctx._host_hf_call(
|
| 503 |
+
endpoint,
|
| 504 |
+
lambda: list(
|
| 505 |
+
islice(ctx._get_hf_api_client().list_user_following(u), scan_lim)
|
| 506 |
+
),
|
| 507 |
+
)
|
| 508 |
+
except Exception as e:
|
| 509 |
+
return ctx._helper_error(
|
| 510 |
+
start_calls=start_calls,
|
| 511 |
+
source=endpoint,
|
| 512 |
+
error=e,
|
| 513 |
+
relation=kind,
|
| 514 |
+
username=u,
|
| 515 |
+
entity=u,
|
| 516 |
+
entity_type=entity_type,
|
| 517 |
+
organization=u if entity_type == "organization" else None,
|
| 518 |
+
)
|
| 519 |
+
normalized: list[dict[str, Any]] = []
|
| 520 |
+
for row in rows:
|
| 521 |
+
handle = getattr(row, "username", None)
|
| 522 |
+
if not isinstance(handle, str) or not handle:
|
| 523 |
+
continue
|
| 524 |
+
item = {
|
| 525 |
+
"username": handle,
|
| 526 |
+
"fullname": getattr(row, "fullname", None),
|
| 527 |
+
"is_pro": getattr(row, "is_pro", None),
|
| 528 |
+
}
|
| 529 |
+
if pro_only is True and item.get("is_pro") is not True:
|
| 530 |
+
continue
|
| 531 |
+
if pro_only is False and item.get("is_pro") is True:
|
| 532 |
+
continue
|
| 533 |
+
normalized.append(item)
|
| 534 |
+
try:
|
| 535 |
+
normalized = ctx._apply_where(
|
| 536 |
+
normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 537 |
+
)
|
| 538 |
+
except ValueError as exc:
|
| 539 |
+
return ctx._helper_error(
|
| 540 |
+
start_calls=start_calls,
|
| 541 |
+
source=endpoint,
|
| 542 |
+
error=exc,
|
| 543 |
+
relation=kind,
|
| 544 |
+
username=u,
|
| 545 |
+
entity=u,
|
| 546 |
+
entity_type=entity_type,
|
| 547 |
+
organization=u if entity_type == "organization" else None,
|
| 548 |
+
)
|
| 549 |
+
observed_total = len(rows)
|
| 550 |
+
scan_exhaustive = observed_total < scan_lim
|
| 551 |
+
overview_list_mismatch = (
|
| 552 |
+
overview_total is not None
|
| 553 |
+
and scan_exhaustive
|
| 554 |
+
and (observed_total != overview_total)
|
| 555 |
+
)
|
| 556 |
+
if filtered:
|
| 557 |
+
exact_count = scan_exhaustive
|
| 558 |
+
total = len(normalized)
|
| 559 |
+
total_matched = len(normalized)
|
| 560 |
+
elif overview_total is not None:
|
| 561 |
+
exact_count = True
|
| 562 |
+
total = overview_total
|
| 563 |
+
total_matched = overview_total
|
| 564 |
+
else:
|
| 565 |
+
exact_count = scan_exhaustive
|
| 566 |
+
total = observed_total
|
| 567 |
+
total_matched = observed_total
|
| 568 |
+
total_available = overview_total if overview_total is not None else observed_total
|
| 569 |
+
items = normalized[:applied_limit]
|
| 570 |
+
scan_limit_hit = not exact_count and observed_total >= scan_lim
|
| 571 |
+
count_source = (
|
| 572 |
+
"overview" if overview_total is not None and (not filtered) else "scan"
|
| 573 |
+
)
|
| 574 |
+
sample_complete = (
|
| 575 |
+
exact_count
|
| 576 |
+
and len(normalized) <= applied_limit
|
| 577 |
+
and (not count_only or len(normalized) == 0)
|
| 578 |
+
)
|
| 579 |
+
more_available = ctx._derive_more_available(
|
| 580 |
+
sample_complete=sample_complete,
|
| 581 |
+
exact_count=exact_count,
|
| 582 |
+
returned=len(items),
|
| 583 |
+
total=total,
|
| 584 |
+
)
|
| 585 |
+
if not exact_count and scan_limit_hit:
|
| 586 |
+
more_available = "unknown" if filtered else True
|
| 587 |
+
try:
|
| 588 |
+
items = ctx._project_actor_items(items, fields)
|
| 589 |
+
except ValueError as exc:
|
| 590 |
+
return ctx._helper_error(
|
| 591 |
+
start_calls=start_calls,
|
| 592 |
+
source=endpoint,
|
| 593 |
+
error=exc,
|
| 594 |
+
relation=kind,
|
| 595 |
+
username=u,
|
| 596 |
+
entity=u,
|
| 597 |
+
entity_type=entity_type,
|
| 598 |
+
organization=u if entity_type == "organization" else None,
|
| 599 |
+
)
|
| 600 |
+
meta = ctx._build_exhaustive_result_meta(
|
| 601 |
+
base_meta={
|
| 602 |
+
"scanned": observed_total,
|
| 603 |
+
"total": total,
|
| 604 |
+
"total_available": total_available,
|
| 605 |
+
"total_matched": total_matched,
|
| 606 |
+
"count_source": count_source,
|
| 607 |
+
"lower_bound": bool(filtered and (not exact_count)),
|
| 608 |
+
"overview_total": overview_total,
|
| 609 |
+
"listed_total": observed_total,
|
| 610 |
+
"overview_list_mismatch": overview_list_mismatch,
|
| 611 |
+
"relation": kind,
|
| 612 |
+
"pro_only": pro_only,
|
| 613 |
+
"where_applied": has_where,
|
| 614 |
+
"entity": u,
|
| 615 |
+
"entity_type": entity_type,
|
| 616 |
+
"username": u,
|
| 617 |
+
"organization": u if entity_type == "organization" else None,
|
| 618 |
+
},
|
| 619 |
+
limit_plan=limit_plan,
|
| 620 |
+
matched_count=len(normalized),
|
| 621 |
+
returned_count=len(items),
|
| 622 |
+
exact_count=exact_count,
|
| 623 |
+
count_only=count_only,
|
| 624 |
+
sample_complete=sample_complete,
|
| 625 |
+
more_available=more_available,
|
| 626 |
+
scan_limit_hit=scan_limit_hit,
|
| 627 |
+
)
|
| 628 |
+
return ctx._helper_success(
|
| 629 |
+
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
| 630 |
+
)
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
async def hf_profile_summary(
|
| 634 |
+
ctx: HelperRuntimeContext,
|
| 635 |
+
handle: str | None = None,
|
| 636 |
+
include: list[str] | None = None,
|
| 637 |
+
likes_limit: int = 10,
|
| 638 |
+
activity_limit: int = 10,
|
| 639 |
+
) -> dict[str, Any]:
|
| 640 |
+
start_calls = ctx.call_count["n"]
|
| 641 |
+
resolved_handle, resolve_error = await resolve_username_or_current(ctx, handle)
|
| 642 |
+
if resolve_error:
|
| 643 |
+
return ctx._helper_error(
|
| 644 |
+
start_calls=start_calls,
|
| 645 |
+
source="/api/users/<u>/overview",
|
| 646 |
+
error=resolve_error,
|
| 647 |
+
)
|
| 648 |
+
if not isinstance(resolved_handle, str):
|
| 649 |
+
return ctx._helper_error(
|
| 650 |
+
start_calls=start_calls,
|
| 651 |
+
source="/api/users/<u>/overview",
|
| 652 |
+
error="handle was not provided and current authenticated user could not be resolved",
|
| 653 |
+
)
|
| 654 |
+
try:
|
| 655 |
+
requested_sections = (
|
| 656 |
+
{part.lower() for part in ctx._coerce_str_list(include) if part.strip()}
|
| 657 |
+
if include is not None
|
| 658 |
+
else set()
|
| 659 |
+
)
|
| 660 |
+
except ValueError as e:
|
| 661 |
+
return ctx._helper_error(
|
| 662 |
+
start_calls=start_calls,
|
| 663 |
+
source=f"/api/users/{resolved_handle}/overview",
|
| 664 |
+
error=e,
|
| 665 |
+
)
|
| 666 |
+
invalid_sections = sorted(requested_sections - {"likes", "activity"})
|
| 667 |
+
if invalid_sections:
|
| 668 |
+
return ctx._helper_error(
|
| 669 |
+
start_calls=start_calls,
|
| 670 |
+
source=f"/api/users/{resolved_handle}/overview",
|
| 671 |
+
error=f"Unsupported include values: {invalid_sections}",
|
| 672 |
+
)
|
| 673 |
+
likes_lim = ctx._clamp_int(
|
| 674 |
+
likes_limit, default=10, minimum=0, maximum=OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 675 |
+
)
|
| 676 |
+
activity_lim = ctx._clamp_int(
|
| 677 |
+
activity_limit, default=10, minimum=0, maximum=OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 678 |
+
)
|
| 679 |
+
section_errors: dict[str, str] = {}
|
| 680 |
+
user_overview = await _hf_user_overview(ctx, resolved_handle)
|
| 681 |
+
if user_overview.get("ok") is True:
|
| 682 |
+
overview_item = ctx._helper_item(user_overview) or {"username": resolved_handle}
|
| 683 |
+
item: dict[str, Any] = {
|
| 684 |
+
"handle": str(overview_item.get("username") or resolved_handle),
|
| 685 |
+
"entity_type": "user",
|
| 686 |
+
"display_name": overview_item.get("fullname")
|
| 687 |
+
or str(overview_item.get("username") or resolved_handle),
|
| 688 |
+
"bio": overview_item.get("bio"),
|
| 689 |
+
"avatar_url": overview_item.get("avatar_url"),
|
| 690 |
+
"website_url": overview_item.get("website_url"),
|
| 691 |
+
"twitter_url": overview_item.get("twitter"),
|
| 692 |
+
"github_url": overview_item.get("github"),
|
| 693 |
+
"linkedin_url": overview_item.get("linkedin"),
|
| 694 |
+
"bluesky_url": overview_item.get("bluesky"),
|
| 695 |
+
"followers_count": ctx._overview_count(overview_item, "followers"),
|
| 696 |
+
"following_count": ctx._overview_count(overview_item, "following"),
|
| 697 |
+
"likes_count": ctx._overview_count(overview_item, "likes"),
|
| 698 |
+
"models_count": ctx._overview_count(overview_item, "models"),
|
| 699 |
+
"datasets_count": ctx._overview_count(overview_item, "datasets"),
|
| 700 |
+
"spaces_count": ctx._overview_count(overview_item, "spaces"),
|
| 701 |
+
"discussions_count": ctx._overview_count(overview_item, "discussions"),
|
| 702 |
+
"papers_count": ctx._overview_count(overview_item, "papers"),
|
| 703 |
+
"upvotes_count": ctx._overview_count(overview_item, "upvotes"),
|
| 704 |
+
"organizations": overview_item.get("orgs"),
|
| 705 |
+
"is_pro": overview_item.get("is_pro"),
|
| 706 |
+
}
|
| 707 |
+
if "likes" in requested_sections:
|
| 708 |
+
likes = await ctx.call_helper(
|
| 709 |
+
"hf_user_likes",
|
| 710 |
+
username=resolved_handle,
|
| 711 |
+
limit=likes_lim,
|
| 712 |
+
scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
|
| 713 |
+
count_only=likes_lim == 0,
|
| 714 |
+
sort="liked_at",
|
| 715 |
+
fields=[
|
| 716 |
+
"liked_at",
|
| 717 |
+
"repo_id",
|
| 718 |
+
"repo_type",
|
| 719 |
+
"repo_author",
|
| 720 |
+
"repo_url",
|
| 721 |
+
],
|
| 722 |
+
)
|
| 723 |
+
item["likes_sample"] = likes.get("items") if likes.get("ok") is True else []
|
| 724 |
+
if likes.get("ok") is not True:
|
| 725 |
+
section_errors["likes"] = str(
|
| 726 |
+
likes.get("error") or "likes fetch failed"
|
| 727 |
+
)
|
| 728 |
+
if "activity" in requested_sections:
|
| 729 |
+
activity = await ctx.call_helper(
|
| 730 |
+
"hf_recent_activity",
|
| 731 |
+
feed_type="user",
|
| 732 |
+
entity=resolved_handle,
|
| 733 |
+
limit=activity_lim,
|
| 734 |
+
max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
|
| 735 |
+
count_only=activity_lim == 0,
|
| 736 |
+
fields=["timestamp", "event_type", "repo_type", "repo_id"],
|
| 737 |
+
)
|
| 738 |
+
item["activity_sample"] = (
|
| 739 |
+
activity.get("items") if activity.get("ok") is True else []
|
| 740 |
+
)
|
| 741 |
+
if activity.get("ok") is not True:
|
| 742 |
+
section_errors["activity"] = str(
|
| 743 |
+
activity.get("error") or "activity fetch failed"
|
| 744 |
+
)
|
| 745 |
+
return ctx._helper_success(
|
| 746 |
+
start_calls=start_calls,
|
| 747 |
+
source=f"/api/users/{resolved_handle}/overview",
|
| 748 |
+
items=[item],
|
| 749 |
+
scanned=1,
|
| 750 |
+
matched=1,
|
| 751 |
+
returned=1,
|
| 752 |
+
truncated=False,
|
| 753 |
+
handle=resolved_handle,
|
| 754 |
+
entity_type="user",
|
| 755 |
+
include=sorted(requested_sections),
|
| 756 |
+
likes_limit=likes_lim,
|
| 757 |
+
activity_limit=activity_lim,
|
| 758 |
+
section_errors=section_errors or None,
|
| 759 |
+
)
|
| 760 |
+
org_overview = await _hf_org_overview(ctx, resolved_handle)
|
| 761 |
+
if org_overview.get("ok") is True:
|
| 762 |
+
overview_item = ctx._helper_item(org_overview) or {
|
| 763 |
+
"organization": resolved_handle
|
| 764 |
+
}
|
| 765 |
+
item = {
|
| 766 |
+
"handle": str(overview_item.get("organization") or resolved_handle),
|
| 767 |
+
"entity_type": "organization",
|
| 768 |
+
"display_name": overview_item.get("display_name")
|
| 769 |
+
or str(overview_item.get("organization") or resolved_handle),
|
| 770 |
+
"description": overview_item.get("description"),
|
| 771 |
+
"avatar_url": overview_item.get("avatar_url"),
|
| 772 |
+
"website_url": overview_item.get("website_url"),
|
| 773 |
+
"followers_count": ctx._overview_count(overview_item, "followers"),
|
| 774 |
+
"members_count": ctx._overview_count(overview_item, "members"),
|
| 775 |
+
"models_count": ctx._overview_count(overview_item, "models"),
|
| 776 |
+
"datasets_count": ctx._overview_count(overview_item, "datasets"),
|
| 777 |
+
"spaces_count": ctx._overview_count(overview_item, "spaces"),
|
| 778 |
+
}
|
| 779 |
+
return ctx._helper_success(
|
| 780 |
+
start_calls=start_calls,
|
| 781 |
+
source=f"/api/organizations/{resolved_handle}/overview",
|
| 782 |
+
items=[item],
|
| 783 |
+
scanned=1,
|
| 784 |
+
matched=1,
|
| 785 |
+
returned=1,
|
| 786 |
+
truncated=False,
|
| 787 |
+
handle=resolved_handle,
|
| 788 |
+
entity_type="organization",
|
| 789 |
+
include=[],
|
| 790 |
+
ignored_includes=sorted(requested_sections) or None,
|
| 791 |
+
)
|
| 792 |
+
error = (
|
| 793 |
+
user_overview.get("error")
|
| 794 |
+
or org_overview.get("error")
|
| 795 |
+
or "profile fetch failed"
|
| 796 |
+
)
|
| 797 |
+
return ctx._helper_error(
|
| 798 |
+
start_calls=start_calls,
|
| 799 |
+
source=f"/api/profiles/{resolved_handle}",
|
| 800 |
+
error=error,
|
| 801 |
+
handle=resolved_handle,
|
| 802 |
+
)
|
| 803 |
+
|
| 804 |
+
|
| 805 |
+
async def hf_user_graph(
|
| 806 |
+
ctx: HelperRuntimeContext,
|
| 807 |
+
username: str | None = None,
|
| 808 |
+
relation: str = "followers",
|
| 809 |
+
limit: int | None = None,
|
| 810 |
+
scan_limit: int | None = None,
|
| 811 |
+
count_only: bool = False,
|
| 812 |
+
pro_only: bool | None = None,
|
| 813 |
+
where: dict[str, Any] | None = None,
|
| 814 |
+
fields: list[str] | None = None,
|
| 815 |
+
) -> dict[str, Any]:
|
| 816 |
+
start_calls = ctx.call_count["n"]
|
| 817 |
+
rel = str(relation or "").strip().lower() or "followers"
|
| 818 |
+
if rel not in {"followers", "following"}:
|
| 819 |
+
return ctx._helper_error(
|
| 820 |
+
start_calls=start_calls,
|
| 821 |
+
source="/api/users/<u>/followers",
|
| 822 |
+
error="relation must be 'followers' or 'following'",
|
| 823 |
+
)
|
| 824 |
+
resolved_username, resolve_error = await resolve_username_or_current(ctx, username)
|
| 825 |
+
if resolve_error:
|
| 826 |
+
return ctx._helper_error(
|
| 827 |
+
start_calls=start_calls,
|
| 828 |
+
source=f"/api/users/<u>/{rel}",
|
| 829 |
+
error=resolve_error,
|
| 830 |
+
relation=rel,
|
| 831 |
+
)
|
| 832 |
+
if not isinstance(resolved_username, str):
|
| 833 |
+
return ctx._helper_error(
|
| 834 |
+
start_calls=start_calls,
|
| 835 |
+
source=f"/api/users/<u>/{rel}",
|
| 836 |
+
error="username is required",
|
| 837 |
+
relation=rel,
|
| 838 |
+
)
|
| 839 |
+
return await _user_graph_helper(
|
| 840 |
+
ctx,
|
| 841 |
+
rel,
|
| 842 |
+
resolved_username,
|
| 843 |
+
pro_only,
|
| 844 |
+
limit,
|
| 845 |
+
scan_limit,
|
| 846 |
+
count_only,
|
| 847 |
+
where,
|
| 848 |
+
fields,
|
| 849 |
+
helper_name="hf_user_graph",
|
| 850 |
+
)
|
| 851 |
+
|
| 852 |
+
|
| 853 |
+
def register_profile_helpers(
|
| 854 |
+
ctx: HelperRuntimeContext,
|
| 855 |
+
) -> dict[str, Callable[..., Any]]:
|
| 856 |
+
return {
|
| 857 |
+
"hf_whoami": partial(hf_whoami, ctx),
|
| 858 |
+
"hf_org_members": partial(hf_org_members, ctx),
|
| 859 |
+
"hf_profile_summary": partial(hf_profile_summary, ctx),
|
| 860 |
+
"hf_user_graph": partial(hf_user_graph, ctx),
|
| 861 |
+
}
|
.prod/monty_api/helpers/repos.py
ADDED
|
@@ -0,0 +1,1359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
|
| 4 |
+
from itertools import islice
|
| 5 |
+
from typing import TYPE_CHECKING, Any, Callable
|
| 6 |
+
from ..context_types import HelperRuntimeContext
|
| 7 |
+
from ..helper_contracts import repo_expand_alias_map
|
| 8 |
+
from ..constants import (
|
| 9 |
+
ACTOR_CANONICAL_FIELDS,
|
| 10 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 11 |
+
EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 12 |
+
LIKES_ENRICHMENT_MAX_REPOS,
|
| 13 |
+
LIKES_RANKING_WINDOW_DEFAULT,
|
| 14 |
+
LIKES_SCAN_LIMIT_CAP,
|
| 15 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 16 |
+
REPO_CANONICAL_FIELDS,
|
| 17 |
+
SELECTIVE_ENDPOINT_RETURN_HARD_CAP,
|
| 18 |
+
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 19 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 20 |
+
)
|
| 21 |
+
from ..registry import (
|
| 22 |
+
REPO_SEARCH_DEFAULT_EXPAND,
|
| 23 |
+
REPO_SEARCH_EXTRA_ARGS,
|
| 24 |
+
TRENDING_DEFAULT_FIELDS,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
from .common import resolve_username_or_current
|
| 29 |
+
|
| 30 |
+
from functools import partial
|
| 31 |
+
|
| 32 |
+
if TYPE_CHECKING:
|
| 33 |
+
from huggingface_hub import HfApi
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _sanitize_repo_expand_values(
|
| 37 |
+
repo_type: str, raw_expand: Any
|
| 38 |
+
) -> tuple[list[str] | None, list[str], str | None]:
|
| 39 |
+
if raw_expand is None:
|
| 40 |
+
return (None, [], None)
|
| 41 |
+
if isinstance(raw_expand, str):
|
| 42 |
+
requested_values = [raw_expand]
|
| 43 |
+
elif isinstance(raw_expand, (list, tuple, set)):
|
| 44 |
+
requested_values = list(raw_expand)
|
| 45 |
+
else:
|
| 46 |
+
return (None, [], "expand must be a string or a list of strings")
|
| 47 |
+
|
| 48 |
+
cleaned: list[str] = []
|
| 49 |
+
for value in requested_values:
|
| 50 |
+
value_str = str(value).strip()
|
| 51 |
+
if value_str and value_str not in cleaned:
|
| 52 |
+
cleaned.append(value_str)
|
| 53 |
+
|
| 54 |
+
alias_map = repo_expand_alias_map(repo_type)
|
| 55 |
+
dropped = [value for value in cleaned if value not in alias_map]
|
| 56 |
+
deduped_kept: list[str] = []
|
| 57 |
+
for value in cleaned:
|
| 58 |
+
resolved = alias_map.get(value)
|
| 59 |
+
if resolved is None or resolved in deduped_kept:
|
| 60 |
+
continue
|
| 61 |
+
deduped_kept.append(resolved)
|
| 62 |
+
return (deduped_kept or None, dropped, None)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _resolve_repo_search_types(
|
| 66 |
+
ctx: HelperRuntimeContext,
|
| 67 |
+
*,
|
| 68 |
+
repo_type: str | None,
|
| 69 |
+
repo_types: list[str] | None,
|
| 70 |
+
default_repo_type: str = "model",
|
| 71 |
+
) -> tuple[list[str] | None, str | None]:
|
| 72 |
+
if repo_type is not None and repo_types is not None:
|
| 73 |
+
return (None, "Pass either repo_type or repo_types, not both")
|
| 74 |
+
|
| 75 |
+
if repo_types is None:
|
| 76 |
+
raw_type = str(repo_type or "").strip()
|
| 77 |
+
if not raw_type:
|
| 78 |
+
return ([default_repo_type], None)
|
| 79 |
+
canonical = ctx._canonical_repo_type(raw_type, default="")
|
| 80 |
+
if canonical not in {"model", "dataset", "space"}:
|
| 81 |
+
return (None, f"Unsupported repo_type '{repo_type}'")
|
| 82 |
+
return ([canonical], None)
|
| 83 |
+
|
| 84 |
+
raw_types = ctx._coerce_str_list(repo_types)
|
| 85 |
+
if not raw_types:
|
| 86 |
+
return (None, "repo_types must not be empty")
|
| 87 |
+
|
| 88 |
+
requested_repo_types: list[str] = []
|
| 89 |
+
for raw in raw_types:
|
| 90 |
+
canonical = ctx._canonical_repo_type(raw, default="")
|
| 91 |
+
if canonical not in {"model", "dataset", "space"}:
|
| 92 |
+
return (None, f"Unsupported repo_type '{raw}'")
|
| 93 |
+
if canonical not in requested_repo_types:
|
| 94 |
+
requested_repo_types.append(canonical)
|
| 95 |
+
return (requested_repo_types, None)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _clean_repo_search_text(value: str | None) -> str | None:
|
| 99 |
+
cleaned = str(value or "").strip()
|
| 100 |
+
return cleaned or None
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _normalize_repo_search_filter(
|
| 104 |
+
ctx: HelperRuntimeContext, value: str | list[str] | None
|
| 105 |
+
) -> tuple[list[str] | None, str | None]:
|
| 106 |
+
if value is None:
|
| 107 |
+
return (None, None)
|
| 108 |
+
try:
|
| 109 |
+
normalized = ctx._coerce_str_list(value)
|
| 110 |
+
except ValueError:
|
| 111 |
+
return (None, "filter must be a string or a list of strings")
|
| 112 |
+
return (normalized or None, None)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _build_repo_search_extra_args(
|
| 116 |
+
repo_type: str, **candidate_args: Any
|
| 117 |
+
) -> tuple[dict[str, Any], list[str], str | None]:
|
| 118 |
+
normalized: dict[str, Any] = {}
|
| 119 |
+
for key, value in candidate_args.items():
|
| 120 |
+
if value is None:
|
| 121 |
+
continue
|
| 122 |
+
if key in {"card_data", "cardData"}:
|
| 123 |
+
if value:
|
| 124 |
+
normalized["cardData"] = True
|
| 125 |
+
continue
|
| 126 |
+
if key in {"fetch_config", "linked"}:
|
| 127 |
+
if value:
|
| 128 |
+
normalized[key] = True
|
| 129 |
+
continue
|
| 130 |
+
normalized[key] = value
|
| 131 |
+
|
| 132 |
+
allowed_extra = REPO_SEARCH_EXTRA_ARGS.get(repo_type, set())
|
| 133 |
+
unsupported = sorted(str(key) for key in normalized if str(key) not in allowed_extra)
|
| 134 |
+
if unsupported:
|
| 135 |
+
return (
|
| 136 |
+
{},
|
| 137 |
+
[],
|
| 138 |
+
f"Unsupported search args for repo_type='{repo_type}': {unsupported}. Allowed args: {sorted(allowed_extra)}",
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
dropped_expand: list[str] = []
|
| 142 |
+
if "expand" in normalized:
|
| 143 |
+
kept_expand, dropped_expand, expand_error = _sanitize_repo_expand_values(
|
| 144 |
+
repo_type, normalized.get("expand")
|
| 145 |
+
)
|
| 146 |
+
if expand_error:
|
| 147 |
+
return ({}, [], expand_error)
|
| 148 |
+
if kept_expand is None:
|
| 149 |
+
normalized.pop("expand", None)
|
| 150 |
+
else:
|
| 151 |
+
normalized["expand"] = kept_expand
|
| 152 |
+
|
| 153 |
+
if not any(
|
| 154 |
+
key in normalized for key in ("expand", "full", "cardData", "fetch_config")
|
| 155 |
+
):
|
| 156 |
+
normalized["expand"] = list(REPO_SEARCH_DEFAULT_EXPAND[repo_type])
|
| 157 |
+
|
| 158 |
+
return (normalized, dropped_expand, None)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _normalize_user_likes_sort(sort: str | None) -> tuple[str | None, str | None]:
|
| 162 |
+
normalized = str(sort or "liked_at").strip() or "liked_at"
|
| 163 |
+
if normalized not in {"liked_at", "repo_likes", "repo_downloads"}:
|
| 164 |
+
return (None, "sort must be one of liked_at, repo_likes, repo_downloads")
|
| 165 |
+
return (normalized, None)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
async def _run_repo_search(
|
| 169 |
+
ctx: HelperRuntimeContext,
|
| 170 |
+
*,
|
| 171 |
+
helper_name: str,
|
| 172 |
+
requested_repo_types: list[str],
|
| 173 |
+
search: str | None,
|
| 174 |
+
filter: str | list[str] | None,
|
| 175 |
+
author: str | None,
|
| 176 |
+
sort: str | None,
|
| 177 |
+
limit: int,
|
| 178 |
+
fields: list[str] | None,
|
| 179 |
+
post_filter: dict[str, Any] | None,
|
| 180 |
+
extra_args_by_type: dict[str, dict[str, Any]] | None = None,
|
| 181 |
+
) -> dict[str, Any]:
|
| 182 |
+
start_calls = ctx.call_count["n"]
|
| 183 |
+
default_limit = ctx._policy_int(helper_name, "default_limit", 20)
|
| 184 |
+
max_limit = ctx._policy_int(
|
| 185 |
+
helper_name, "max_limit", SELECTIVE_ENDPOINT_RETURN_HARD_CAP
|
| 186 |
+
)
|
| 187 |
+
filter_list, filter_error = _normalize_repo_search_filter(ctx, filter)
|
| 188 |
+
if filter_error:
|
| 189 |
+
return ctx._helper_error(
|
| 190 |
+
start_calls=start_calls,
|
| 191 |
+
source="/api/repos",
|
| 192 |
+
error=filter_error,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
term = _clean_repo_search_text(search)
|
| 196 |
+
author_clean = _clean_repo_search_text(author)
|
| 197 |
+
requested_limit = limit
|
| 198 |
+
applied_limit = ctx._clamp_int(
|
| 199 |
+
limit,
|
| 200 |
+
default=default_limit,
|
| 201 |
+
minimum=1,
|
| 202 |
+
maximum=max_limit,
|
| 203 |
+
)
|
| 204 |
+
limit_meta = ctx._derive_limit_metadata(
|
| 205 |
+
requested_limit=requested_limit,
|
| 206 |
+
applied_limit=applied_limit,
|
| 207 |
+
default_limit_used=limit == default_limit,
|
| 208 |
+
)
|
| 209 |
+
hard_cap_applied = bool(limit_meta.get("hard_cap_applied"))
|
| 210 |
+
|
| 211 |
+
sort_keys: dict[str, str | None] = {}
|
| 212 |
+
for repo_type in requested_repo_types:
|
| 213 |
+
sort_key, sort_error = ctx._normalize_repo_sort_key(repo_type, sort)
|
| 214 |
+
if sort_error:
|
| 215 |
+
return ctx._helper_error(
|
| 216 |
+
start_calls=start_calls,
|
| 217 |
+
source=f"/api/{repo_type}s",
|
| 218 |
+
error=sort_error,
|
| 219 |
+
)
|
| 220 |
+
sort_keys[repo_type] = sort_key
|
| 221 |
+
|
| 222 |
+
all_items: list[dict[str, Any]] = []
|
| 223 |
+
scanned = 0
|
| 224 |
+
source_endpoints: list[str] = []
|
| 225 |
+
limit_boundary_hit = False
|
| 226 |
+
ignored_expand: dict[str, list[str]] = {}
|
| 227 |
+
api = ctx._get_hf_api_client()
|
| 228 |
+
|
| 229 |
+
for repo_type in requested_repo_types:
|
| 230 |
+
endpoint = f"/api/{repo_type}s"
|
| 231 |
+
source_endpoints.append(endpoint)
|
| 232 |
+
raw_extra_args = dict((extra_args_by_type or {}).get(repo_type, {}))
|
| 233 |
+
extra_args, dropped_expand, extra_error = _build_repo_search_extra_args(
|
| 234 |
+
repo_type,
|
| 235 |
+
**raw_extra_args,
|
| 236 |
+
)
|
| 237 |
+
if extra_error:
|
| 238 |
+
return ctx._helper_error(
|
| 239 |
+
start_calls=start_calls,
|
| 240 |
+
source=endpoint,
|
| 241 |
+
error=extra_error,
|
| 242 |
+
)
|
| 243 |
+
if dropped_expand:
|
| 244 |
+
ignored_expand[repo_type] = dropped_expand
|
| 245 |
+
try:
|
| 246 |
+
payload = ctx._host_hf_call(
|
| 247 |
+
endpoint,
|
| 248 |
+
lambda repo_type=repo_type, extra_args=extra_args: ctx._repo_list_call(
|
| 249 |
+
api,
|
| 250 |
+
repo_type,
|
| 251 |
+
search=term,
|
| 252 |
+
author=author_clean,
|
| 253 |
+
filter=filter_list,
|
| 254 |
+
sort=sort_keys[repo_type],
|
| 255 |
+
limit=applied_limit,
|
| 256 |
+
**extra_args,
|
| 257 |
+
),
|
| 258 |
+
)
|
| 259 |
+
except Exception as e:
|
| 260 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 261 |
+
scanned += len(payload)
|
| 262 |
+
if len(payload) >= applied_limit:
|
| 263 |
+
limit_boundary_hit = True
|
| 264 |
+
all_items.extend(
|
| 265 |
+
ctx._normalize_repo_search_row(row, repo_type)
|
| 266 |
+
for row in payload[:applied_limit]
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
try:
|
| 270 |
+
all_items = ctx._apply_where(
|
| 271 |
+
all_items, post_filter, allowed_fields=REPO_CANONICAL_FIELDS
|
| 272 |
+
)
|
| 273 |
+
except ValueError as exc:
|
| 274 |
+
return ctx._helper_error(
|
| 275 |
+
start_calls=start_calls,
|
| 276 |
+
source="/api/repos",
|
| 277 |
+
error=exc,
|
| 278 |
+
)
|
| 279 |
+
combined_sort_key = next(iter(sort_keys.values()), None)
|
| 280 |
+
all_items = ctx._sort_repo_rows(all_items, combined_sort_key)
|
| 281 |
+
matched = len(all_items)
|
| 282 |
+
try:
|
| 283 |
+
all_items = ctx._project_repo_items(all_items[:applied_limit], fields)
|
| 284 |
+
except ValueError as exc:
|
| 285 |
+
return ctx._helper_error(
|
| 286 |
+
start_calls=start_calls,
|
| 287 |
+
source="/api/repos",
|
| 288 |
+
error=exc,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
more_available: bool | str = False
|
| 292 |
+
truncated = False
|
| 293 |
+
truncated_by = "none"
|
| 294 |
+
next_request_hint: str | None = None
|
| 295 |
+
if hard_cap_applied and scanned >= applied_limit:
|
| 296 |
+
truncated = True
|
| 297 |
+
truncated_by = "hard_cap"
|
| 298 |
+
more_available = "unknown"
|
| 299 |
+
next_request_hint = f"Increase limit above {applied_limit} to improve coverage"
|
| 300 |
+
elif limit_boundary_hit:
|
| 301 |
+
more_available = "unknown"
|
| 302 |
+
next_request_hint = (
|
| 303 |
+
f"Increase limit above {applied_limit} to check whether more rows exist"
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
return ctx._helper_success(
|
| 307 |
+
start_calls=start_calls,
|
| 308 |
+
source=",".join(source_endpoints),
|
| 309 |
+
items=all_items,
|
| 310 |
+
helper=helper_name,
|
| 311 |
+
search=term,
|
| 312 |
+
repo_types=requested_repo_types,
|
| 313 |
+
filter=filter_list,
|
| 314 |
+
sort=combined_sort_key,
|
| 315 |
+
author=author_clean,
|
| 316 |
+
limit=applied_limit,
|
| 317 |
+
post_filter=post_filter if isinstance(post_filter, dict) and post_filter else None,
|
| 318 |
+
scanned=scanned,
|
| 319 |
+
matched=matched,
|
| 320 |
+
returned=len(all_items),
|
| 321 |
+
truncated=truncated,
|
| 322 |
+
truncated_by=truncated_by,
|
| 323 |
+
more_available=more_available,
|
| 324 |
+
limit_boundary_hit=limit_boundary_hit,
|
| 325 |
+
next_request_hint=next_request_hint,
|
| 326 |
+
ignored_expand=ignored_expand or None,
|
| 327 |
+
**limit_meta,
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
async def hf_models_search(
|
| 332 |
+
ctx: HelperRuntimeContext,
|
| 333 |
+
search: str | None = None,
|
| 334 |
+
filter: str | list[str] | None = None,
|
| 335 |
+
author: str | None = None,
|
| 336 |
+
apps: str | list[str] | None = None,
|
| 337 |
+
gated: bool | None = None,
|
| 338 |
+
inference: str | None = None,
|
| 339 |
+
inference_provider: str | list[str] | None = None,
|
| 340 |
+
model_name: str | None = None,
|
| 341 |
+
trained_dataset: str | list[str] | None = None,
|
| 342 |
+
pipeline_tag: str | None = None,
|
| 343 |
+
emissions_thresholds: tuple[float, float] | None = None,
|
| 344 |
+
sort: str | None = None,
|
| 345 |
+
limit: int = 20,
|
| 346 |
+
expand: list[str] | None = None,
|
| 347 |
+
full: bool | None = None,
|
| 348 |
+
card_data: bool = False,
|
| 349 |
+
fetch_config: bool = False,
|
| 350 |
+
fields: list[str] | None = None,
|
| 351 |
+
post_filter: dict[str, Any] | None = None,
|
| 352 |
+
) -> dict[str, Any]:
|
| 353 |
+
return await _run_repo_search(
|
| 354 |
+
ctx,
|
| 355 |
+
helper_name="hf_models_search",
|
| 356 |
+
requested_repo_types=["model"],
|
| 357 |
+
search=search,
|
| 358 |
+
filter=filter,
|
| 359 |
+
author=author,
|
| 360 |
+
sort=sort,
|
| 361 |
+
limit=limit,
|
| 362 |
+
fields=fields,
|
| 363 |
+
post_filter=post_filter,
|
| 364 |
+
extra_args_by_type={
|
| 365 |
+
"model": {
|
| 366 |
+
"apps": apps,
|
| 367 |
+
"gated": gated,
|
| 368 |
+
"inference": inference,
|
| 369 |
+
"inference_provider": inference_provider,
|
| 370 |
+
"model_name": model_name,
|
| 371 |
+
"trained_dataset": trained_dataset,
|
| 372 |
+
"pipeline_tag": pipeline_tag,
|
| 373 |
+
"emissions_thresholds": emissions_thresholds,
|
| 374 |
+
"expand": expand,
|
| 375 |
+
"full": full,
|
| 376 |
+
"card_data": card_data,
|
| 377 |
+
"fetch_config": fetch_config,
|
| 378 |
+
}
|
| 379 |
+
},
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
async def hf_datasets_search(
|
| 384 |
+
ctx: HelperRuntimeContext,
|
| 385 |
+
search: str | None = None,
|
| 386 |
+
filter: str | list[str] | None = None,
|
| 387 |
+
author: str | None = None,
|
| 388 |
+
benchmark: str | bool | None = None,
|
| 389 |
+
dataset_name: str | None = None,
|
| 390 |
+
gated: bool | None = None,
|
| 391 |
+
language_creators: str | list[str] | None = None,
|
| 392 |
+
language: str | list[str] | None = None,
|
| 393 |
+
multilinguality: str | list[str] | None = None,
|
| 394 |
+
size_categories: str | list[str] | None = None,
|
| 395 |
+
task_categories: str | list[str] | None = None,
|
| 396 |
+
task_ids: str | list[str] | None = None,
|
| 397 |
+
sort: str | None = None,
|
| 398 |
+
limit: int = 20,
|
| 399 |
+
expand: list[str] | None = None,
|
| 400 |
+
full: bool | None = None,
|
| 401 |
+
fields: list[str] | None = None,
|
| 402 |
+
post_filter: dict[str, Any] | None = None,
|
| 403 |
+
) -> dict[str, Any]:
|
| 404 |
+
return await _run_repo_search(
|
| 405 |
+
ctx,
|
| 406 |
+
helper_name="hf_datasets_search",
|
| 407 |
+
requested_repo_types=["dataset"],
|
| 408 |
+
search=search,
|
| 409 |
+
filter=filter,
|
| 410 |
+
author=author,
|
| 411 |
+
sort=sort,
|
| 412 |
+
limit=limit,
|
| 413 |
+
fields=fields,
|
| 414 |
+
post_filter=post_filter,
|
| 415 |
+
extra_args_by_type={
|
| 416 |
+
"dataset": {
|
| 417 |
+
"benchmark": benchmark,
|
| 418 |
+
"dataset_name": dataset_name,
|
| 419 |
+
"gated": gated,
|
| 420 |
+
"language_creators": language_creators,
|
| 421 |
+
"language": language,
|
| 422 |
+
"multilinguality": multilinguality,
|
| 423 |
+
"size_categories": size_categories,
|
| 424 |
+
"task_categories": task_categories,
|
| 425 |
+
"task_ids": task_ids,
|
| 426 |
+
"expand": expand,
|
| 427 |
+
"full": full,
|
| 428 |
+
}
|
| 429 |
+
},
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
async def hf_spaces_search(
|
| 434 |
+
ctx: HelperRuntimeContext,
|
| 435 |
+
search: str | None = None,
|
| 436 |
+
filter: str | list[str] | None = None,
|
| 437 |
+
author: str | None = None,
|
| 438 |
+
datasets: str | list[str] | None = None,
|
| 439 |
+
models: str | list[str] | None = None,
|
| 440 |
+
linked: bool = False,
|
| 441 |
+
sort: str | None = None,
|
| 442 |
+
limit: int = 20,
|
| 443 |
+
expand: list[str] | None = None,
|
| 444 |
+
full: bool | None = None,
|
| 445 |
+
fields: list[str] | None = None,
|
| 446 |
+
post_filter: dict[str, Any] | None = None,
|
| 447 |
+
) -> dict[str, Any]:
|
| 448 |
+
return await _run_repo_search(
|
| 449 |
+
ctx,
|
| 450 |
+
helper_name="hf_spaces_search",
|
| 451 |
+
requested_repo_types=["space"],
|
| 452 |
+
search=search,
|
| 453 |
+
filter=filter,
|
| 454 |
+
author=author,
|
| 455 |
+
sort=sort,
|
| 456 |
+
limit=limit,
|
| 457 |
+
fields=fields,
|
| 458 |
+
post_filter=post_filter,
|
| 459 |
+
extra_args_by_type={
|
| 460 |
+
"space": {
|
| 461 |
+
"datasets": datasets,
|
| 462 |
+
"models": models,
|
| 463 |
+
"linked": linked,
|
| 464 |
+
"expand": expand,
|
| 465 |
+
"full": full,
|
| 466 |
+
}
|
| 467 |
+
},
|
| 468 |
+
)
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
async def hf_repo_search(
|
| 472 |
+
ctx: HelperRuntimeContext,
|
| 473 |
+
search: str | None = None,
|
| 474 |
+
repo_type: str | None = None,
|
| 475 |
+
repo_types: list[str] | None = None,
|
| 476 |
+
filter: str | list[str] | None = None,
|
| 477 |
+
author: str | None = None,
|
| 478 |
+
sort: str | None = None,
|
| 479 |
+
limit: int = 20,
|
| 480 |
+
fields: list[str] | None = None,
|
| 481 |
+
post_filter: dict[str, Any] | None = None,
|
| 482 |
+
) -> dict[str, Any]:
|
| 483 |
+
start_calls = ctx.call_count["n"]
|
| 484 |
+
requested_repo_types, type_error = _resolve_repo_search_types(
|
| 485 |
+
ctx,
|
| 486 |
+
repo_type=repo_type,
|
| 487 |
+
repo_types=repo_types,
|
| 488 |
+
)
|
| 489 |
+
if type_error:
|
| 490 |
+
return ctx._helper_error(
|
| 491 |
+
start_calls=start_calls,
|
| 492 |
+
source="/api/repos",
|
| 493 |
+
error=type_error,
|
| 494 |
+
)
|
| 495 |
+
if not requested_repo_types:
|
| 496 |
+
return ctx._helper_error(
|
| 497 |
+
start_calls=start_calls,
|
| 498 |
+
source="/api/repos",
|
| 499 |
+
error="repo_type or repo_types is required",
|
| 500 |
+
)
|
| 501 |
+
return await _run_repo_search(
|
| 502 |
+
ctx,
|
| 503 |
+
helper_name="hf_repo_search",
|
| 504 |
+
requested_repo_types=requested_repo_types,
|
| 505 |
+
search=search,
|
| 506 |
+
filter=filter,
|
| 507 |
+
author=author,
|
| 508 |
+
sort=sort,
|
| 509 |
+
limit=limit,
|
| 510 |
+
fields=fields,
|
| 511 |
+
post_filter=post_filter,
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
|
| 515 |
+
async def hf_user_likes(
|
| 516 |
+
ctx: HelperRuntimeContext,
|
| 517 |
+
username: str | None = None,
|
| 518 |
+
repo_types: list[str] | None = None,
|
| 519 |
+
limit: int | None = None,
|
| 520 |
+
scan_limit: int | None = None,
|
| 521 |
+
count_only: bool = False,
|
| 522 |
+
where: dict[str, Any] | None = None,
|
| 523 |
+
fields: list[str] | None = None,
|
| 524 |
+
sort: str | None = None,
|
| 525 |
+
ranking_window: int | None = None,
|
| 526 |
+
) -> dict[str, Any]:
|
| 527 |
+
start_calls = ctx.call_count["n"]
|
| 528 |
+
default_limit = ctx._policy_int("hf_user_likes", "default_limit", 100)
|
| 529 |
+
scan_cap = ctx._policy_int("hf_user_likes", "scan_max", LIKES_SCAN_LIMIT_CAP)
|
| 530 |
+
ranking_default = ctx._policy_int(
|
| 531 |
+
"hf_user_likes", "ranking_default", LIKES_RANKING_WINDOW_DEFAULT
|
| 532 |
+
)
|
| 533 |
+
enrich_cap = ctx._policy_int(
|
| 534 |
+
"hf_user_likes", "enrich_max", LIKES_ENRICHMENT_MAX_REPOS
|
| 535 |
+
)
|
| 536 |
+
resolved_username, resolve_error = await resolve_username_or_current(ctx, username)
|
| 537 |
+
if resolve_error:
|
| 538 |
+
return ctx._helper_error(
|
| 539 |
+
start_calls=start_calls,
|
| 540 |
+
source="/api/users/<u>/likes",
|
| 541 |
+
error=resolve_error,
|
| 542 |
+
)
|
| 543 |
+
if not isinstance(resolved_username, str):
|
| 544 |
+
return ctx._helper_error(
|
| 545 |
+
start_calls=start_calls,
|
| 546 |
+
source="/api/users/<u>/likes",
|
| 547 |
+
error="username is required",
|
| 548 |
+
)
|
| 549 |
+
sort_key, sort_error = _normalize_user_likes_sort(sort)
|
| 550 |
+
if sort_error:
|
| 551 |
+
return ctx._helper_error(
|
| 552 |
+
start_calls=start_calls,
|
| 553 |
+
source=f"/api/users/{resolved_username}/likes",
|
| 554 |
+
error=sort_error,
|
| 555 |
+
)
|
| 556 |
+
if sort_key is None:
|
| 557 |
+
return ctx._helper_error(
|
| 558 |
+
start_calls=start_calls,
|
| 559 |
+
source=f"/api/users/{resolved_username}/likes",
|
| 560 |
+
error="sort must be one of liked_at, repo_likes, repo_downloads",
|
| 561 |
+
)
|
| 562 |
+
limit_plan = ctx._resolve_exhaustive_limits(
|
| 563 |
+
limit=limit,
|
| 564 |
+
count_only=count_only,
|
| 565 |
+
default_limit=default_limit,
|
| 566 |
+
max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
|
| 567 |
+
scan_limit=scan_limit,
|
| 568 |
+
scan_cap=scan_cap,
|
| 569 |
+
)
|
| 570 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 571 |
+
scan_lim = int(limit_plan["applied_scan_limit"])
|
| 572 |
+
try:
|
| 573 |
+
normalized_where = ctx._normalize_where(
|
| 574 |
+
where, allowed_fields=USER_LIKES_CANONICAL_FIELDS
|
| 575 |
+
)
|
| 576 |
+
except ValueError as exc:
|
| 577 |
+
return ctx._helper_error(
|
| 578 |
+
start_calls=start_calls,
|
| 579 |
+
source=f"/api/users/{resolved_username}/likes",
|
| 580 |
+
error=exc,
|
| 581 |
+
)
|
| 582 |
+
allowed_repo_types: set[str] | None = None
|
| 583 |
+
try:
|
| 584 |
+
raw_repo_types: list[str] = (
|
| 585 |
+
ctx._coerce_str_list(repo_types) if repo_types is not None else []
|
| 586 |
+
)
|
| 587 |
+
except ValueError as e:
|
| 588 |
+
return ctx._helper_error(
|
| 589 |
+
start_calls=start_calls,
|
| 590 |
+
source=f"/api/users/{resolved_username}/likes",
|
| 591 |
+
error=e,
|
| 592 |
+
)
|
| 593 |
+
if raw_repo_types:
|
| 594 |
+
allowed_repo_types = set()
|
| 595 |
+
for raw in raw_repo_types:
|
| 596 |
+
canonical = ctx._canonical_repo_type(raw, default="")
|
| 597 |
+
if canonical not in {"model", "dataset", "space"}:
|
| 598 |
+
return ctx._helper_error(
|
| 599 |
+
start_calls=start_calls,
|
| 600 |
+
source=f"/api/users/{resolved_username}/likes",
|
| 601 |
+
error=f"Unsupported repo_type '{raw}'",
|
| 602 |
+
)
|
| 603 |
+
allowed_repo_types.add(canonical)
|
| 604 |
+
endpoint = f"/api/users/{resolved_username}/likes"
|
| 605 |
+
resp = ctx._host_raw_call(endpoint, params={"limit": scan_lim})
|
| 606 |
+
if not resp.get("ok"):
|
| 607 |
+
return ctx._helper_error(
|
| 608 |
+
start_calls=start_calls,
|
| 609 |
+
source=endpoint,
|
| 610 |
+
error=resp.get("error") or "likes fetch failed",
|
| 611 |
+
)
|
| 612 |
+
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 613 |
+
scanned_rows = payload[:scan_lim]
|
| 614 |
+
matched_rows: list[tuple[int, dict[str, Any]]] = []
|
| 615 |
+
for row in scanned_rows:
|
| 616 |
+
if not isinstance(row, dict):
|
| 617 |
+
continue
|
| 618 |
+
repo = row.get("repo") if isinstance(row.get("repo"), dict) else {}
|
| 619 |
+
repo_data = row.get("repoData") if isinstance(row.get("repoData"), dict) else {}
|
| 620 |
+
repo_id = repo_data.get("id") or repo_data.get("name") or repo.get("name")
|
| 621 |
+
if not isinstance(repo_id, str) or not repo_id:
|
| 622 |
+
continue
|
| 623 |
+
repo_type = ctx._canonical_repo_type(
|
| 624 |
+
repo_data.get("type") or repo.get("type"), default=""
|
| 625 |
+
)
|
| 626 |
+
if not repo_type:
|
| 627 |
+
repo_type = ctx._canonical_repo_type(repo.get("type"), default="model")
|
| 628 |
+
if allowed_repo_types is not None and repo_type not in allowed_repo_types:
|
| 629 |
+
continue
|
| 630 |
+
repo_author = repo_data.get("author")
|
| 631 |
+
if not isinstance(repo_author, str) and "/" in repo_id:
|
| 632 |
+
repo_author = repo_id.split("/", 1)[0]
|
| 633 |
+
item = {
|
| 634 |
+
"liked_at": row.get("likedAt") or row.get("createdAt"),
|
| 635 |
+
"repo_id": repo_id,
|
| 636 |
+
"repo_type": repo_type,
|
| 637 |
+
"repo_author": repo_author,
|
| 638 |
+
"repo_likes": ctx._as_int(repo_data.get("likes")),
|
| 639 |
+
"repo_downloads": ctx._as_int(repo_data.get("downloads")),
|
| 640 |
+
"repo_url": ctx._repo_web_url(repo_type, repo_id),
|
| 641 |
+
}
|
| 642 |
+
if not ctx._item_matches_where(item, normalized_where):
|
| 643 |
+
continue
|
| 644 |
+
matched_rows.append((len(matched_rows), item))
|
| 645 |
+
matched = len(matched_rows)
|
| 646 |
+
scan_exhaustive = len(payload) < scan_lim
|
| 647 |
+
exact_count = scan_exhaustive
|
| 648 |
+
total_matched = matched
|
| 649 |
+
total = total_matched
|
| 650 |
+
effective_ranking_window: int | None = None
|
| 651 |
+
ranking_window_hit = False
|
| 652 |
+
ranking_window_applied = False
|
| 653 |
+
ranking_next_request_hint: str | None = None
|
| 654 |
+
ranking_complete = sort_key == "liked_at" and exact_count
|
| 655 |
+
enriched = 0
|
| 656 |
+
selected_pairs: list[tuple[int, dict[str, Any]]]
|
| 657 |
+
if count_only:
|
| 658 |
+
selected_pairs = []
|
| 659 |
+
ranking_complete = False if matched > 0 else exact_count
|
| 660 |
+
elif sort_key == "liked_at":
|
| 661 |
+
selected_pairs = matched_rows[:applied_limit]
|
| 662 |
+
else:
|
| 663 |
+
metric = str(sort_key)
|
| 664 |
+
requested_window = (
|
| 665 |
+
ranking_window if ranking_window is not None else ranking_default
|
| 666 |
+
)
|
| 667 |
+
effective_ranking_window = ctx._clamp_int(
|
| 668 |
+
requested_window, default=ranking_default, minimum=1, maximum=enrich_cap
|
| 669 |
+
)
|
| 670 |
+
ranking_window_applied = (
|
| 671 |
+
ranking_window is not None
|
| 672 |
+
and effective_ranking_window != int(ranking_window)
|
| 673 |
+
)
|
| 674 |
+
shortlist_size = min(effective_ranking_window, matched, scan_lim)
|
| 675 |
+
ranking_window_hit = matched > shortlist_size
|
| 676 |
+
shortlist = matched_rows[:shortlist_size]
|
| 677 |
+
candidates = [
|
| 678 |
+
pair
|
| 679 |
+
for pair in shortlist
|
| 680 |
+
if pair[1].get(metric) is None
|
| 681 |
+
and isinstance(pair[1].get("repo_id"), str)
|
| 682 |
+
and (pair[1].get("repo_type") in {"model", "dataset", "space"})
|
| 683 |
+
]
|
| 684 |
+
enrich_budget = min(len(candidates), ctx._budget_remaining(), shortlist_size)
|
| 685 |
+
for _, item in candidates[:enrich_budget]:
|
| 686 |
+
repo_type = str(item.get("repo_type"))
|
| 687 |
+
repo_id = str(item.get("repo_id"))
|
| 688 |
+
detail_endpoint = f"/api/{ctx._canonical_repo_type(repo_type)}s/{repo_id}"
|
| 689 |
+
try:
|
| 690 |
+
detail = ctx._host_hf_call(
|
| 691 |
+
detail_endpoint,
|
| 692 |
+
lambda rt=repo_type, rid=repo_id: ctx._repo_detail_call(
|
| 693 |
+
ctx._get_hf_api_client(), rt, rid
|
| 694 |
+
),
|
| 695 |
+
)
|
| 696 |
+
except Exception:
|
| 697 |
+
continue
|
| 698 |
+
likes = ctx._as_int(getattr(detail, "likes", None))
|
| 699 |
+
downloads = ctx._as_int(getattr(detail, "downloads", None))
|
| 700 |
+
if likes is not None:
|
| 701 |
+
item["repo_likes"] = likes
|
| 702 |
+
if downloads is not None:
|
| 703 |
+
item["repo_downloads"] = downloads
|
| 704 |
+
enriched += 1
|
| 705 |
+
|
| 706 |
+
def _ranking_key(pair: tuple[int, dict[str, Any]]) -> tuple[int, int, int]:
|
| 707 |
+
idx, row = pair
|
| 708 |
+
metric_value = ctx._as_int(row.get(metric))
|
| 709 |
+
if metric_value is None:
|
| 710 |
+
return (1, 0, idx)
|
| 711 |
+
return (0, -metric_value, idx)
|
| 712 |
+
|
| 713 |
+
ranked_shortlist = sorted(shortlist, key=_ranking_key)
|
| 714 |
+
selected_pairs = ranked_shortlist[:applied_limit]
|
| 715 |
+
ranking_complete = (
|
| 716 |
+
exact_count
|
| 717 |
+
and shortlist_size >= matched
|
| 718 |
+
and (len(candidates) <= enrich_budget)
|
| 719 |
+
)
|
| 720 |
+
if not ranking_complete:
|
| 721 |
+
if ranking_window_hit:
|
| 722 |
+
if effective_ranking_window < enrich_cap:
|
| 723 |
+
ranking_next_request_hint = (
|
| 724 |
+
f"Increase ranking_window above {effective_ranking_window} "
|
| 725 |
+
"for broader popularity reranking"
|
| 726 |
+
)
|
| 727 |
+
else:
|
| 728 |
+
ranking_next_request_hint = (
|
| 729 |
+
f"Popularity reranking is capped at {effective_ranking_window} "
|
| 730 |
+
"candidate repos per call"
|
| 731 |
+
)
|
| 732 |
+
elif len(candidates) > enrich_budget:
|
| 733 |
+
ranking_next_request_hint = (
|
| 734 |
+
f"Popularity reranking exhausted detail budget after {enrich_budget} "
|
| 735 |
+
"repo enrichments"
|
| 736 |
+
)
|
| 737 |
+
try:
|
| 738 |
+
items = ctx._project_user_like_items([row for _, row in selected_pairs], fields)
|
| 739 |
+
except ValueError as exc:
|
| 740 |
+
return ctx._helper_error(
|
| 741 |
+
start_calls=start_calls,
|
| 742 |
+
source=endpoint,
|
| 743 |
+
error=exc,
|
| 744 |
+
)
|
| 745 |
+
popularity_present = sum(
|
| 746 |
+
(1 for _, row in selected_pairs if row.get("repo_likes") is not None)
|
| 747 |
+
)
|
| 748 |
+
sample_complete = (
|
| 749 |
+
exact_count
|
| 750 |
+
and applied_limit >= matched
|
| 751 |
+
and (sort_key == "liked_at" or ranking_complete)
|
| 752 |
+
and (not count_only or matched == 0)
|
| 753 |
+
)
|
| 754 |
+
scan_limit_hit = not scan_exhaustive and len(payload) >= scan_lim
|
| 755 |
+
more_available = ctx._derive_more_available(
|
| 756 |
+
sample_complete=sample_complete,
|
| 757 |
+
exact_count=exact_count,
|
| 758 |
+
returned=len(items),
|
| 759 |
+
total=total,
|
| 760 |
+
)
|
| 761 |
+
if scan_limit_hit:
|
| 762 |
+
more_available = "unknown" if allowed_repo_types is not None or where else True
|
| 763 |
+
meta = ctx._build_exhaustive_result_meta(
|
| 764 |
+
base_meta={
|
| 765 |
+
"scanned": len(scanned_rows),
|
| 766 |
+
"total": total,
|
| 767 |
+
"total_available": len(payload),
|
| 768 |
+
"total_matched": total_matched,
|
| 769 |
+
"count_source": "scan",
|
| 770 |
+
"lower_bound": not exact_count,
|
| 771 |
+
"enriched": enriched,
|
| 772 |
+
"popularity_present": popularity_present,
|
| 773 |
+
"sort_applied": sort_key,
|
| 774 |
+
"ranking_window": effective_ranking_window,
|
| 775 |
+
"requested_ranking_window": ranking_window,
|
| 776 |
+
"ranking_window_applied": ranking_window_applied,
|
| 777 |
+
"ranking_window_hit": ranking_window_hit,
|
| 778 |
+
"ranking_next_request_hint": ranking_next_request_hint,
|
| 779 |
+
"ranking_complete": ranking_complete,
|
| 780 |
+
"username": resolved_username,
|
| 781 |
+
},
|
| 782 |
+
limit_plan=limit_plan,
|
| 783 |
+
matched_count=matched,
|
| 784 |
+
returned_count=len(items),
|
| 785 |
+
exact_count=exact_count,
|
| 786 |
+
count_only=count_only,
|
| 787 |
+
sample_complete=sample_complete,
|
| 788 |
+
more_available=more_available,
|
| 789 |
+
scan_limit_hit=scan_limit_hit,
|
| 790 |
+
truncated_extra=sort_key != "liked_at" and (not ranking_complete),
|
| 791 |
+
)
|
| 792 |
+
return ctx._helper_success(
|
| 793 |
+
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
| 794 |
+
)
|
| 795 |
+
|
| 796 |
+
|
| 797 |
+
async def hf_repo_likers(
|
| 798 |
+
ctx: HelperRuntimeContext,
|
| 799 |
+
repo_id: str,
|
| 800 |
+
repo_type: str,
|
| 801 |
+
limit: int | None = None,
|
| 802 |
+
count_only: bool = False,
|
| 803 |
+
pro_only: bool | None = None,
|
| 804 |
+
where: dict[str, Any] | None = None,
|
| 805 |
+
fields: list[str] | None = None,
|
| 806 |
+
) -> dict[str, Any]:
|
| 807 |
+
start_calls = ctx.call_count["n"]
|
| 808 |
+
rid = str(repo_id or "").strip()
|
| 809 |
+
if not rid:
|
| 810 |
+
return ctx._helper_error(
|
| 811 |
+
start_calls=start_calls,
|
| 812 |
+
source="/api/repos/<repo>/likers",
|
| 813 |
+
error="repo_id is required",
|
| 814 |
+
)
|
| 815 |
+
rt = ctx._canonical_repo_type(repo_type, default="")
|
| 816 |
+
if rt not in {"model", "dataset", "space"}:
|
| 817 |
+
return ctx._helper_error(
|
| 818 |
+
start_calls=start_calls,
|
| 819 |
+
source=f"/api/repos/{rid}/likers",
|
| 820 |
+
error=f"Unsupported repo_type '{repo_type}'",
|
| 821 |
+
repo_id=rid,
|
| 822 |
+
)
|
| 823 |
+
default_limit = ctx._policy_int("hf_repo_likers", "default_limit", 1000)
|
| 824 |
+
requested_limit = limit
|
| 825 |
+
default_limit_used = requested_limit is None and (not count_only)
|
| 826 |
+
has_where = isinstance(where, dict) and bool(where)
|
| 827 |
+
endpoint = f"/api/{rt}s/{rid}/likers"
|
| 828 |
+
resp = ctx._host_raw_call(endpoint)
|
| 829 |
+
if not resp.get("ok"):
|
| 830 |
+
return ctx._helper_error(
|
| 831 |
+
start_calls=start_calls,
|
| 832 |
+
source=endpoint,
|
| 833 |
+
error=resp.get("error") or "repo likers fetch failed",
|
| 834 |
+
repo_id=rid,
|
| 835 |
+
repo_type=rt,
|
| 836 |
+
)
|
| 837 |
+
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 838 |
+
try:
|
| 839 |
+
normalized_where = ctx._normalize_where(
|
| 840 |
+
where, allowed_fields=ACTOR_CANONICAL_FIELDS
|
| 841 |
+
)
|
| 842 |
+
except ValueError as exc:
|
| 843 |
+
return ctx._helper_error(
|
| 844 |
+
start_calls=start_calls,
|
| 845 |
+
source=endpoint,
|
| 846 |
+
error=exc,
|
| 847 |
+
repo_id=rid,
|
| 848 |
+
repo_type=rt,
|
| 849 |
+
)
|
| 850 |
+
normalized: list[dict[str, Any]] = []
|
| 851 |
+
for row in payload:
|
| 852 |
+
if not isinstance(row, dict):
|
| 853 |
+
continue
|
| 854 |
+
username = row.get("user") or row.get("username")
|
| 855 |
+
if not isinstance(username, str) or not username:
|
| 856 |
+
continue
|
| 857 |
+
item = {
|
| 858 |
+
"username": username,
|
| 859 |
+
"fullname": row.get("fullname"),
|
| 860 |
+
"type": row.get("type")
|
| 861 |
+
if isinstance(row.get("type"), str) and row.get("type")
|
| 862 |
+
else "user",
|
| 863 |
+
"is_pro": row.get("isPro"),
|
| 864 |
+
}
|
| 865 |
+
if pro_only is True and item.get("is_pro") is not True:
|
| 866 |
+
continue
|
| 867 |
+
if pro_only is False and item.get("is_pro") is True:
|
| 868 |
+
continue
|
| 869 |
+
if not ctx._item_matches_where(item, normalized_where):
|
| 870 |
+
continue
|
| 871 |
+
normalized.append(item)
|
| 872 |
+
if count_only:
|
| 873 |
+
applied_limit = 0
|
| 874 |
+
elif requested_limit is None:
|
| 875 |
+
applied_limit = default_limit
|
| 876 |
+
else:
|
| 877 |
+
try:
|
| 878 |
+
applied_limit = max(0, int(requested_limit))
|
| 879 |
+
except Exception:
|
| 880 |
+
applied_limit = default_limit
|
| 881 |
+
limit_plan = {
|
| 882 |
+
"requested_limit": requested_limit,
|
| 883 |
+
"applied_limit": applied_limit,
|
| 884 |
+
"default_limit_used": default_limit_used,
|
| 885 |
+
"hard_cap_applied": False,
|
| 886 |
+
}
|
| 887 |
+
matched = len(normalized)
|
| 888 |
+
items = [] if count_only else normalized[:applied_limit]
|
| 889 |
+
limit_hit = applied_limit > 0 and matched > applied_limit
|
| 890 |
+
truncated_by = ctx._derive_truncated_by(
|
| 891 |
+
hard_cap=False, limit_hit=limit_hit
|
| 892 |
+
)
|
| 893 |
+
sample_complete = matched <= applied_limit and (not count_only or matched == 0)
|
| 894 |
+
truncated = truncated_by != "none"
|
| 895 |
+
more_available = ctx._derive_more_available(
|
| 896 |
+
sample_complete=sample_complete,
|
| 897 |
+
exact_count=True,
|
| 898 |
+
returned=len(items),
|
| 899 |
+
total=matched,
|
| 900 |
+
)
|
| 901 |
+
try:
|
| 902 |
+
items = ctx._project_actor_items(items, fields)
|
| 903 |
+
except ValueError as exc:
|
| 904 |
+
return ctx._helper_error(
|
| 905 |
+
start_calls=start_calls,
|
| 906 |
+
source=endpoint,
|
| 907 |
+
error=exc,
|
| 908 |
+
repo_id=rid,
|
| 909 |
+
repo_type=rt,
|
| 910 |
+
)
|
| 911 |
+
meta = ctx._build_exhaustive_meta(
|
| 912 |
+
base_meta={
|
| 913 |
+
"scanned": len(payload),
|
| 914 |
+
"matched": matched,
|
| 915 |
+
"returned": len(items),
|
| 916 |
+
"total": matched,
|
| 917 |
+
"total_available": len(payload),
|
| 918 |
+
"total_matched": matched,
|
| 919 |
+
"truncated": truncated,
|
| 920 |
+
"count_source": "likers_list",
|
| 921 |
+
"lower_bound": False,
|
| 922 |
+
"repo_id": rid,
|
| 923 |
+
"repo_type": rt,
|
| 924 |
+
"pro_only": pro_only,
|
| 925 |
+
"where_applied": has_where,
|
| 926 |
+
"upstream_pagination": "none",
|
| 927 |
+
},
|
| 928 |
+
limit_plan=limit_plan,
|
| 929 |
+
sample_complete=sample_complete,
|
| 930 |
+
exact_count=True,
|
| 931 |
+
truncated_by=truncated_by,
|
| 932 |
+
more_available=more_available,
|
| 933 |
+
)
|
| 934 |
+
meta["hard_cap_applied"] = False
|
| 935 |
+
return ctx._helper_success(
|
| 936 |
+
start_calls=start_calls, source=endpoint, items=items, meta=meta
|
| 937 |
+
)
|
| 938 |
+
|
| 939 |
+
|
| 940 |
+
async def hf_repo_discussions(
|
| 941 |
+
ctx: HelperRuntimeContext,
|
| 942 |
+
repo_type: str,
|
| 943 |
+
repo_id: str,
|
| 944 |
+
limit: int = 20,
|
| 945 |
+
fields: list[str] | None = None,
|
| 946 |
+
) -> dict[str, Any]:
|
| 947 |
+
start_calls = ctx.call_count["n"]
|
| 948 |
+
rt = ctx._canonical_repo_type(repo_type)
|
| 949 |
+
rid = str(repo_id or "").strip()
|
| 950 |
+
if "/" not in rid:
|
| 951 |
+
return ctx._helper_error(
|
| 952 |
+
start_calls=start_calls,
|
| 953 |
+
source="/api/.../discussions",
|
| 954 |
+
error="repo_id must be owner/name",
|
| 955 |
+
)
|
| 956 |
+
lim = ctx._clamp_int(
|
| 957 |
+
limit, default=20, minimum=1, maximum=SELECTIVE_ENDPOINT_RETURN_HARD_CAP
|
| 958 |
+
)
|
| 959 |
+
endpoint = f"/api/{rt}s/{rid}/discussions"
|
| 960 |
+
try:
|
| 961 |
+
discussions = ctx._host_hf_call(
|
| 962 |
+
endpoint,
|
| 963 |
+
lambda: list(
|
| 964 |
+
islice(
|
| 965 |
+
ctx._get_hf_api_client().get_repo_discussions(
|
| 966 |
+
repo_id=rid, repo_type=rt
|
| 967 |
+
),
|
| 968 |
+
lim,
|
| 969 |
+
)
|
| 970 |
+
),
|
| 971 |
+
)
|
| 972 |
+
except Exception as e:
|
| 973 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 974 |
+
items: list[dict[str, Any]] = []
|
| 975 |
+
for d in discussions:
|
| 976 |
+
num = ctx._as_int(getattr(d, "num", None))
|
| 977 |
+
items.append(
|
| 978 |
+
{
|
| 979 |
+
"num": num,
|
| 980 |
+
"repo_id": rid,
|
| 981 |
+
"repo_type": rt,
|
| 982 |
+
"title": getattr(d, "title", None),
|
| 983 |
+
"author": getattr(d, "author", None),
|
| 984 |
+
"created_at": str(getattr(d, "created_at", None))
|
| 985 |
+
if getattr(d, "created_at", None) is not None
|
| 986 |
+
else None,
|
| 987 |
+
"status": getattr(d, "status", None),
|
| 988 |
+
"url": getattr(d, "url", None),
|
| 989 |
+
}
|
| 990 |
+
)
|
| 991 |
+
try:
|
| 992 |
+
items = ctx._project_discussion_items(items, fields)
|
| 993 |
+
except ValueError as exc:
|
| 994 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=exc)
|
| 995 |
+
return ctx._helper_success(
|
| 996 |
+
start_calls=start_calls,
|
| 997 |
+
source=endpoint,
|
| 998 |
+
items=items,
|
| 999 |
+
scanned=len(items),
|
| 1000 |
+
matched=len(items),
|
| 1001 |
+
returned=len(items),
|
| 1002 |
+
truncated=False,
|
| 1003 |
+
total_count=None,
|
| 1004 |
+
)
|
| 1005 |
+
|
| 1006 |
+
|
| 1007 |
+
async def hf_repo_discussion_details(
|
| 1008 |
+
ctx: HelperRuntimeContext,
|
| 1009 |
+
repo_type: str,
|
| 1010 |
+
repo_id: str,
|
| 1011 |
+
discussion_num: int,
|
| 1012 |
+
fields: list[str] | None = None,
|
| 1013 |
+
) -> dict[str, Any]:
|
| 1014 |
+
start_calls = ctx.call_count["n"]
|
| 1015 |
+
rt = ctx._canonical_repo_type(repo_type)
|
| 1016 |
+
rid = str(repo_id or "").strip()
|
| 1017 |
+
if "/" not in rid:
|
| 1018 |
+
return ctx._helper_error(
|
| 1019 |
+
start_calls=start_calls,
|
| 1020 |
+
source="/api/.../discussions/<num>",
|
| 1021 |
+
error="repo_id must be owner/name",
|
| 1022 |
+
)
|
| 1023 |
+
num = ctx._as_int(discussion_num)
|
| 1024 |
+
if num is None:
|
| 1025 |
+
return ctx._helper_error(
|
| 1026 |
+
start_calls=start_calls,
|
| 1027 |
+
source=f"/api/{rt}s/{rid}/discussions/<num>",
|
| 1028 |
+
error="discussion_num must be an integer",
|
| 1029 |
+
)
|
| 1030 |
+
endpoint = f"/api/{rt}s/{rid}/discussions/{num}"
|
| 1031 |
+
try:
|
| 1032 |
+
detail = ctx._host_hf_call(
|
| 1033 |
+
endpoint,
|
| 1034 |
+
lambda: ctx._get_hf_api_client().get_discussion_details(
|
| 1035 |
+
repo_id=rid, discussion_num=int(num), repo_type=rt
|
| 1036 |
+
),
|
| 1037 |
+
)
|
| 1038 |
+
except Exception as e:
|
| 1039 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
|
| 1040 |
+
comment_events: list[dict[str, Any]] = []
|
| 1041 |
+
raw_events = getattr(detail, "events", None)
|
| 1042 |
+
if isinstance(raw_events, list):
|
| 1043 |
+
for event in raw_events:
|
| 1044 |
+
if str(getattr(event, "type", "")).strip().lower() != "comment":
|
| 1045 |
+
continue
|
| 1046 |
+
comment_events.append(
|
| 1047 |
+
{
|
| 1048 |
+
"author": getattr(event, "author", None),
|
| 1049 |
+
"created_at": ctx._dt_to_str(getattr(event, "created_at", None)),
|
| 1050 |
+
"text": getattr(event, "content", None),
|
| 1051 |
+
"rendered": getattr(event, "rendered", None),
|
| 1052 |
+
}
|
| 1053 |
+
)
|
| 1054 |
+
latest_comment: dict[str, Any] | None = None
|
| 1055 |
+
if comment_events:
|
| 1056 |
+
latest_comment = max(
|
| 1057 |
+
comment_events, key=lambda row: str(row.get("created_at") or "")
|
| 1058 |
+
)
|
| 1059 |
+
item: dict[str, Any] = {
|
| 1060 |
+
"num": num,
|
| 1061 |
+
"repo_id": rid,
|
| 1062 |
+
"repo_type": rt,
|
| 1063 |
+
"title": getattr(detail, "title", None),
|
| 1064 |
+
"author": getattr(detail, "author", None),
|
| 1065 |
+
"created_at": ctx._dt_to_str(getattr(detail, "created_at", None)),
|
| 1066 |
+
"status": getattr(detail, "status", None),
|
| 1067 |
+
"url": getattr(detail, "url", None),
|
| 1068 |
+
"comment_count": len(comment_events),
|
| 1069 |
+
"latest_comment_author": latest_comment.get("author")
|
| 1070 |
+
if latest_comment
|
| 1071 |
+
else None,
|
| 1072 |
+
"latest_comment_created_at": latest_comment.get("created_at")
|
| 1073 |
+
if latest_comment
|
| 1074 |
+
else None,
|
| 1075 |
+
"latest_comment_text": latest_comment.get("text") if latest_comment else None,
|
| 1076 |
+
"latest_comment_html": latest_comment.get("rendered")
|
| 1077 |
+
if latest_comment
|
| 1078 |
+
else None,
|
| 1079 |
+
}
|
| 1080 |
+
try:
|
| 1081 |
+
items = ctx._project_discussion_detail_items([item], fields)
|
| 1082 |
+
except ValueError as exc:
|
| 1083 |
+
return ctx._helper_error(start_calls=start_calls, source=endpoint, error=exc)
|
| 1084 |
+
return ctx._helper_success(
|
| 1085 |
+
start_calls=start_calls,
|
| 1086 |
+
source=endpoint,
|
| 1087 |
+
items=items,
|
| 1088 |
+
scanned=len(comment_events),
|
| 1089 |
+
matched=1,
|
| 1090 |
+
returned=len(items),
|
| 1091 |
+
truncated=False,
|
| 1092 |
+
total_comments=len(comment_events),
|
| 1093 |
+
)
|
| 1094 |
+
|
| 1095 |
+
|
| 1096 |
+
def _resolve_repo_detail_row(
|
| 1097 |
+
ctx: HelperRuntimeContext,
|
| 1098 |
+
api: "HfApi",
|
| 1099 |
+
repo_id: str,
|
| 1100 |
+
attempt_types: list[str],
|
| 1101 |
+
) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
|
| 1102 |
+
rid = str(repo_id or "").strip()
|
| 1103 |
+
if "/" not in rid:
|
| 1104 |
+
return (None, {"repo_id": rid, "error": "repo_id must be owner/name"})
|
| 1105 |
+
resolved_type: str | None = None
|
| 1106 |
+
detail: Any = None
|
| 1107 |
+
last_endpoint = "/api/repos"
|
| 1108 |
+
errors: list[str] = []
|
| 1109 |
+
for rt in attempt_types:
|
| 1110 |
+
endpoint = f"/api/{rt}s/{rid}"
|
| 1111 |
+
last_endpoint = endpoint
|
| 1112 |
+
try:
|
| 1113 |
+
detail = ctx._host_hf_call(
|
| 1114 |
+
endpoint, lambda rt=rt, rid=rid: ctx._repo_detail_call(api, rt, rid)
|
| 1115 |
+
)
|
| 1116 |
+
resolved_type = rt
|
| 1117 |
+
break
|
| 1118 |
+
except Exception as e:
|
| 1119 |
+
errors.append(f"{rt}: {str(e)}")
|
| 1120 |
+
if resolved_type is None or detail is None:
|
| 1121 |
+
return (
|
| 1122 |
+
None,
|
| 1123 |
+
{
|
| 1124 |
+
"repo_id": rid,
|
| 1125 |
+
"error": "; ".join(errors[:3]) if errors else "repo lookup failed",
|
| 1126 |
+
"attempted_repo_types": list(attempt_types),
|
| 1127 |
+
"source": last_endpoint,
|
| 1128 |
+
},
|
| 1129 |
+
)
|
| 1130 |
+
return (ctx._normalize_repo_detail_row(detail, resolved_type, rid), None)
|
| 1131 |
+
|
| 1132 |
+
|
| 1133 |
+
async def hf_repo_details(
|
| 1134 |
+
ctx: HelperRuntimeContext,
|
| 1135 |
+
repo_id: str | None = None,
|
| 1136 |
+
repo_ids: list[str] | None = None,
|
| 1137 |
+
repo_type: str = "auto",
|
| 1138 |
+
fields: list[str] | None = None,
|
| 1139 |
+
) -> dict[str, Any]:
|
| 1140 |
+
start_calls = ctx.call_count["n"]
|
| 1141 |
+
if repo_id is not None and repo_ids is not None:
|
| 1142 |
+
return ctx._helper_error(
|
| 1143 |
+
start_calls=start_calls,
|
| 1144 |
+
source="/api/repos",
|
| 1145 |
+
error="Pass either repo_id or repo_ids, not both",
|
| 1146 |
+
)
|
| 1147 |
+
requested_ids = (
|
| 1148 |
+
[str(repo_id).strip()]
|
| 1149 |
+
if isinstance(repo_id, str) and str(repo_id).strip()
|
| 1150 |
+
else []
|
| 1151 |
+
)
|
| 1152 |
+
if repo_ids is not None:
|
| 1153 |
+
requested_ids = ctx._coerce_str_list(repo_ids)
|
| 1154 |
+
if not requested_ids:
|
| 1155 |
+
return ctx._helper_error(
|
| 1156 |
+
start_calls=start_calls,
|
| 1157 |
+
source="/api/repos",
|
| 1158 |
+
error="repo_id or repo_ids is required",
|
| 1159 |
+
)
|
| 1160 |
+
raw_type = str(repo_type or "auto").strip().lower()
|
| 1161 |
+
if raw_type in {"", "auto"}:
|
| 1162 |
+
base_attempt_types = ["model", "dataset", "space"]
|
| 1163 |
+
else:
|
| 1164 |
+
canonical_type = ctx._canonical_repo_type(raw_type, default="")
|
| 1165 |
+
if canonical_type not in {"model", "dataset", "space"}:
|
| 1166 |
+
return ctx._helper_error(
|
| 1167 |
+
start_calls=start_calls,
|
| 1168 |
+
source="/api/repos",
|
| 1169 |
+
error=f"Unsupported repo_type '{repo_type}'",
|
| 1170 |
+
)
|
| 1171 |
+
base_attempt_types = [canonical_type]
|
| 1172 |
+
api = ctx._get_hf_api_client()
|
| 1173 |
+
items: list[dict[str, Any]] = []
|
| 1174 |
+
failures: list[dict[str, Any]] = []
|
| 1175 |
+
for rid in requested_ids:
|
| 1176 |
+
row, failure = _resolve_repo_detail_row(ctx, api, rid, base_attempt_types)
|
| 1177 |
+
if row is None:
|
| 1178 |
+
if failure is not None:
|
| 1179 |
+
failures.append(failure)
|
| 1180 |
+
continue
|
| 1181 |
+
items.append(row)
|
| 1182 |
+
if not items:
|
| 1183 |
+
summary = failures[0]["error"] if failures else "repo lookup failed"
|
| 1184 |
+
return ctx._helper_error(
|
| 1185 |
+
start_calls=start_calls,
|
| 1186 |
+
source="/api/repos",
|
| 1187 |
+
error=summary,
|
| 1188 |
+
failures=failures,
|
| 1189 |
+
repo_type=repo_type,
|
| 1190 |
+
)
|
| 1191 |
+
try:
|
| 1192 |
+
items = ctx._project_repo_items(items, fields)
|
| 1193 |
+
except ValueError as exc:
|
| 1194 |
+
return ctx._helper_error(start_calls=start_calls, source="/api/repos", error=exc)
|
| 1195 |
+
return ctx._helper_success(
|
| 1196 |
+
start_calls=start_calls,
|
| 1197 |
+
source="/api/repos",
|
| 1198 |
+
items=items,
|
| 1199 |
+
repo_type=repo_type,
|
| 1200 |
+
requested_repo_ids=requested_ids,
|
| 1201 |
+
failures=failures or None,
|
| 1202 |
+
matched=len(items),
|
| 1203 |
+
returned=len(items),
|
| 1204 |
+
)
|
| 1205 |
+
|
| 1206 |
+
|
| 1207 |
+
async def hf_trending(
|
| 1208 |
+
ctx: HelperRuntimeContext,
|
| 1209 |
+
repo_type: str = "model",
|
| 1210 |
+
limit: int = 20,
|
| 1211 |
+
where: dict[str, Any] | None = None,
|
| 1212 |
+
fields: list[str] | None = None,
|
| 1213 |
+
) -> dict[str, Any]:
|
| 1214 |
+
start_calls = ctx.call_count["n"]
|
| 1215 |
+
default_limit = ctx._policy_int("hf_trending", "default_limit", 20)
|
| 1216 |
+
max_limit = ctx._policy_int(
|
| 1217 |
+
"hf_trending", "max_limit", TRENDING_ENDPOINT_MAX_LIMIT
|
| 1218 |
+
)
|
| 1219 |
+
raw_type = str(repo_type or "model").strip().lower()
|
| 1220 |
+
if raw_type == "all":
|
| 1221 |
+
requested_type = "all"
|
| 1222 |
+
else:
|
| 1223 |
+
requested_type = ctx._canonical_repo_type(raw_type, default="")
|
| 1224 |
+
if requested_type not in {"model", "dataset", "space"}:
|
| 1225 |
+
return ctx._helper_error(
|
| 1226 |
+
start_calls=start_calls,
|
| 1227 |
+
source="/api/trending",
|
| 1228 |
+
error=f"Unsupported repo_type '{repo_type}'",
|
| 1229 |
+
)
|
| 1230 |
+
lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
|
| 1231 |
+
resp = ctx._host_raw_call(
|
| 1232 |
+
"/api/trending", params={"type": requested_type, "limit": lim}
|
| 1233 |
+
)
|
| 1234 |
+
if not resp.get("ok"):
|
| 1235 |
+
return ctx._helper_error(
|
| 1236 |
+
start_calls=start_calls,
|
| 1237 |
+
source="/api/trending",
|
| 1238 |
+
error=resp.get("error") or "trending fetch failed",
|
| 1239 |
+
)
|
| 1240 |
+
payload = resp.get("data") if isinstance(resp.get("data"), dict) else {}
|
| 1241 |
+
rows = (
|
| 1242 |
+
payload.get("recentlyTrending")
|
| 1243 |
+
if isinstance(payload.get("recentlyTrending"), list)
|
| 1244 |
+
else []
|
| 1245 |
+
)
|
| 1246 |
+
items: list[dict[str, Any]] = []
|
| 1247 |
+
default_row_type = requested_type if requested_type != "all" else "model"
|
| 1248 |
+
for idx, row in enumerate(rows[:lim], start=1):
|
| 1249 |
+
if not isinstance(row, dict):
|
| 1250 |
+
continue
|
| 1251 |
+
repo = row.get("repoData") if isinstance(row.get("repoData"), dict) else {}
|
| 1252 |
+
items.append(ctx._normalize_trending_row(repo, default_row_type, rank=idx))
|
| 1253 |
+
try:
|
| 1254 |
+
items = ctx._apply_where(items, where, allowed_fields=TRENDING_DEFAULT_FIELDS)
|
| 1255 |
+
except ValueError as exc:
|
| 1256 |
+
return ctx._helper_error(
|
| 1257 |
+
start_calls=start_calls,
|
| 1258 |
+
source="/api/trending",
|
| 1259 |
+
error=exc,
|
| 1260 |
+
)
|
| 1261 |
+
matched = len(items)
|
| 1262 |
+
try:
|
| 1263 |
+
items = ctx._project_items(
|
| 1264 |
+
items[:lim],
|
| 1265 |
+
fields,
|
| 1266 |
+
allowed_fields=TRENDING_DEFAULT_FIELDS,
|
| 1267 |
+
)
|
| 1268 |
+
except ValueError as exc:
|
| 1269 |
+
return ctx._helper_error(
|
| 1270 |
+
start_calls=start_calls,
|
| 1271 |
+
source="/api/trending",
|
| 1272 |
+
error=exc,
|
| 1273 |
+
)
|
| 1274 |
+
return ctx._helper_success(
|
| 1275 |
+
start_calls=start_calls,
|
| 1276 |
+
source="/api/trending",
|
| 1277 |
+
items=items,
|
| 1278 |
+
repo_type=requested_type,
|
| 1279 |
+
limit=lim,
|
| 1280 |
+
scanned=len(rows),
|
| 1281 |
+
matched=matched,
|
| 1282 |
+
returned=len(items),
|
| 1283 |
+
trending_score_available=any(
|
| 1284 |
+
(item.get("trending_score") is not None for item in items)
|
| 1285 |
+
),
|
| 1286 |
+
ordered_ranking=True,
|
| 1287 |
+
)
|
| 1288 |
+
|
| 1289 |
+
|
| 1290 |
+
async def hf_daily_papers(
|
| 1291 |
+
ctx: HelperRuntimeContext,
|
| 1292 |
+
limit: int = 20,
|
| 1293 |
+
where: dict[str, Any] | None = None,
|
| 1294 |
+
fields: list[str] | None = None,
|
| 1295 |
+
) -> dict[str, Any]:
|
| 1296 |
+
start_calls = ctx.call_count["n"]
|
| 1297 |
+
default_limit = ctx._policy_int("hf_daily_papers", "default_limit", 20)
|
| 1298 |
+
max_limit = ctx._policy_int(
|
| 1299 |
+
"hf_daily_papers", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
|
| 1300 |
+
)
|
| 1301 |
+
lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
|
| 1302 |
+
resp = ctx._host_raw_call("/api/daily_papers", params={"limit": lim})
|
| 1303 |
+
if not resp.get("ok"):
|
| 1304 |
+
return ctx._helper_error(
|
| 1305 |
+
start_calls=start_calls,
|
| 1306 |
+
source="/api/daily_papers",
|
| 1307 |
+
error=resp.get("error") or "daily papers fetch failed",
|
| 1308 |
+
)
|
| 1309 |
+
payload = resp.get("data") if isinstance(resp.get("data"), list) else []
|
| 1310 |
+
items: list[dict[str, Any]] = []
|
| 1311 |
+
for idx, row in enumerate(payload[:lim], start=1):
|
| 1312 |
+
if not isinstance(row, dict):
|
| 1313 |
+
continue
|
| 1314 |
+
items.append(ctx._normalize_daily_paper_row(row, rank=idx))
|
| 1315 |
+
try:
|
| 1316 |
+
items = ctx._apply_where(
|
| 1317 |
+
items, where, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
|
| 1318 |
+
)
|
| 1319 |
+
except ValueError as exc:
|
| 1320 |
+
return ctx._helper_error(
|
| 1321 |
+
start_calls=start_calls,
|
| 1322 |
+
source="/api/daily_papers",
|
| 1323 |
+
error=exc,
|
| 1324 |
+
)
|
| 1325 |
+
matched = len(items)
|
| 1326 |
+
try:
|
| 1327 |
+
items = ctx._project_daily_paper_items(items[:lim], fields)
|
| 1328 |
+
except ValueError as exc:
|
| 1329 |
+
return ctx._helper_error(
|
| 1330 |
+
start_calls=start_calls,
|
| 1331 |
+
source="/api/daily_papers",
|
| 1332 |
+
error=exc,
|
| 1333 |
+
)
|
| 1334 |
+
return ctx._helper_success(
|
| 1335 |
+
start_calls=start_calls,
|
| 1336 |
+
source="/api/daily_papers",
|
| 1337 |
+
items=items,
|
| 1338 |
+
limit=lim,
|
| 1339 |
+
scanned=len(payload),
|
| 1340 |
+
matched=matched,
|
| 1341 |
+
returned=len(items),
|
| 1342 |
+
ordered_ranking=True,
|
| 1343 |
+
)
|
| 1344 |
+
|
| 1345 |
+
|
| 1346 |
+
def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
|
| 1347 |
+
return {
|
| 1348 |
+
"hf_models_search": partial(hf_models_search, ctx),
|
| 1349 |
+
"hf_datasets_search": partial(hf_datasets_search, ctx),
|
| 1350 |
+
"hf_spaces_search": partial(hf_spaces_search, ctx),
|
| 1351 |
+
"hf_repo_search": partial(hf_repo_search, ctx),
|
| 1352 |
+
"hf_user_likes": partial(hf_user_likes, ctx),
|
| 1353 |
+
"hf_repo_likers": partial(hf_repo_likers, ctx),
|
| 1354 |
+
"hf_repo_discussions": partial(hf_repo_discussions, ctx),
|
| 1355 |
+
"hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
|
| 1356 |
+
"hf_repo_details": partial(hf_repo_details, ctx),
|
| 1357 |
+
"hf_trending": partial(hf_trending, ctx),
|
| 1358 |
+
"hf_daily_papers": partial(hf_daily_papers, ctx),
|
| 1359 |
+
}
|
.prod/monty_api/http_runtime.py
ADDED
|
@@ -0,0 +1,597 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from typing import TYPE_CHECKING, Any
|
| 6 |
+
from urllib.error import HTTPError, URLError
|
| 7 |
+
from urllib.parse import urlencode
|
| 8 |
+
from urllib.request import Request, urlopen
|
| 9 |
+
|
| 10 |
+
from .aliases import REPO_SORT_KEYS
|
| 11 |
+
from .constants import (
|
| 12 |
+
DEFAULT_TIMEOUT_SEC,
|
| 13 |
+
)
|
| 14 |
+
from .registry import REPO_API_ADAPTERS, REPO_SEARCH_DEFAULT_EXPAND
|
| 15 |
+
from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
|
| 16 |
+
|
| 17 |
+
if TYPE_CHECKING:
|
| 18 |
+
from huggingface_hub import HfApi
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _load_request_token() -> str | None:
|
| 22 |
+
try:
|
| 23 |
+
from fast_agent.mcp.auth.context import request_bearer_token # type: ignore
|
| 24 |
+
|
| 25 |
+
token = request_bearer_token.get()
|
| 26 |
+
if token:
|
| 27 |
+
return token
|
| 28 |
+
except Exception:
|
| 29 |
+
pass
|
| 30 |
+
return None
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _load_token() -> str | None:
|
| 34 |
+
token = _load_request_token()
|
| 35 |
+
if token:
|
| 36 |
+
return token
|
| 37 |
+
return os.getenv("HF_TOKEN") or None
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _json_best_effort(raw: bytes) -> Any:
|
| 41 |
+
try:
|
| 42 |
+
return json.loads(raw)
|
| 43 |
+
except Exception:
|
| 44 |
+
return raw.decode("utf-8", errors="replace")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _clamp_int(value: Any, *, default: int, minimum: int, maximum: int) -> int:
|
| 48 |
+
try:
|
| 49 |
+
out = int(value)
|
| 50 |
+
except Exception:
|
| 51 |
+
out = default
|
| 52 |
+
return max(minimum, min(out, maximum))
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _as_int(value: Any) -> int | None:
|
| 56 |
+
try:
|
| 57 |
+
return int(value)
|
| 58 |
+
except Exception:
|
| 59 |
+
return None
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _canonical_repo_type(value: Any, *, default: str = "model") -> str:
|
| 63 |
+
raw = str(value or "").strip().lower()
|
| 64 |
+
aliases = {
|
| 65 |
+
"model": "model",
|
| 66 |
+
"models": "model",
|
| 67 |
+
"dataset": "dataset",
|
| 68 |
+
"datasets": "dataset",
|
| 69 |
+
"space": "space",
|
| 70 |
+
"spaces": "space",
|
| 71 |
+
}
|
| 72 |
+
return aliases.get(raw, default)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _normalize_repo_sort_key(
|
| 76 |
+
repo_type: str, sort_value: Any
|
| 77 |
+
) -> tuple[str | None, str | None]:
|
| 78 |
+
raw = str(sort_value or "").strip()
|
| 79 |
+
if not raw:
|
| 80 |
+
return None, None
|
| 81 |
+
|
| 82 |
+
key = raw
|
| 83 |
+
if key not in {
|
| 84 |
+
"created_at",
|
| 85 |
+
"downloads",
|
| 86 |
+
"last_modified",
|
| 87 |
+
"likes",
|
| 88 |
+
"trending_score",
|
| 89 |
+
}:
|
| 90 |
+
return None, f"Invalid sort key '{raw}'"
|
| 91 |
+
|
| 92 |
+
rt = _canonical_repo_type(repo_type)
|
| 93 |
+
allowed = REPO_SORT_KEYS.get(rt, set())
|
| 94 |
+
if key not in allowed:
|
| 95 |
+
return (
|
| 96 |
+
None,
|
| 97 |
+
f"Invalid sort key '{raw}' for repo_type='{rt}'. Allowed: {', '.join(sorted(allowed))}",
|
| 98 |
+
)
|
| 99 |
+
return key, None
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _repo_api_adapter(repo_type: str) -> Any:
|
| 103 |
+
rt = _canonical_repo_type(repo_type, default="")
|
| 104 |
+
adapter = REPO_API_ADAPTERS.get(rt)
|
| 105 |
+
if adapter is None:
|
| 106 |
+
raise ValueError(f"Unsupported repo_type '{repo_type}'")
|
| 107 |
+
return adapter
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _repo_list_call(api: HfApi, repo_type: str, **kwargs: Any) -> list[Any]:
|
| 111 |
+
adapter = _repo_api_adapter(repo_type)
|
| 112 |
+
method = getattr(api, adapter.list_method_name)
|
| 113 |
+
return list(method(**kwargs))
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
|
| 117 |
+
adapter = _repo_api_adapter(repo_type)
|
| 118 |
+
method = getattr(api, adapter.detail_method_name)
|
| 119 |
+
if _canonical_repo_type(repo_type) == "space":
|
| 120 |
+
return method(repo_id, expand=list(REPO_SEARCH_DEFAULT_EXPAND["space"]))
|
| 121 |
+
return method(repo_id)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _coerce_str_list(value: Any) -> list[str]:
|
| 125 |
+
if value is None:
|
| 126 |
+
return []
|
| 127 |
+
if isinstance(value, str):
|
| 128 |
+
raw = [value]
|
| 129 |
+
elif isinstance(value, (list, tuple, set)):
|
| 130 |
+
raw = list(value)
|
| 131 |
+
else:
|
| 132 |
+
raise ValueError("Expected a string or list of strings")
|
| 133 |
+
return [str(v).strip() for v in raw if str(v).strip()]
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _optional_str_list(value: Any) -> list[str] | None:
|
| 137 |
+
if value is None:
|
| 138 |
+
return None
|
| 139 |
+
if isinstance(value, str):
|
| 140 |
+
out = [value.strip()] if value.strip() else []
|
| 141 |
+
return out or None
|
| 142 |
+
if isinstance(value, (list, tuple, set)):
|
| 143 |
+
out = [str(v).strip() for v in value if str(v).strip()]
|
| 144 |
+
return out or None
|
| 145 |
+
return None
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _space_runtime_to_dict(value: Any) -> dict[str, Any] | None:
|
| 149 |
+
if value is None:
|
| 150 |
+
return None
|
| 151 |
+
|
| 152 |
+
if isinstance(value, dict):
|
| 153 |
+
raw = value
|
| 154 |
+
hardware = raw.get("hardware")
|
| 155 |
+
current_hardware = (
|
| 156 |
+
hardware.get("current") if isinstance(hardware, dict) else hardware
|
| 157 |
+
)
|
| 158 |
+
requested_hardware = (
|
| 159 |
+
hardware.get("requested")
|
| 160 |
+
if isinstance(hardware, dict)
|
| 161 |
+
else raw.get("requested_hardware") or raw.get("requestedHardware")
|
| 162 |
+
)
|
| 163 |
+
sleep_time = _as_int(
|
| 164 |
+
raw.get("gcTimeout")
|
| 165 |
+
if raw.get("gcTimeout") is not None
|
| 166 |
+
else raw.get("sleep_time") or raw.get("sleepTime")
|
| 167 |
+
)
|
| 168 |
+
out = {
|
| 169 |
+
"stage": raw.get("stage"),
|
| 170 |
+
"hardware": current_hardware,
|
| 171 |
+
"requested_hardware": requested_hardware,
|
| 172 |
+
"sleep_time": sleep_time,
|
| 173 |
+
}
|
| 174 |
+
return {key: val for key, val in out.items() if val is not None} or None
|
| 175 |
+
|
| 176 |
+
out = {
|
| 177 |
+
"stage": getattr(value, "stage", None),
|
| 178 |
+
"hardware": getattr(value, "hardware", None),
|
| 179 |
+
"requested_hardware": getattr(value, "requested_hardware", None),
|
| 180 |
+
"sleep_time": _as_int(getattr(value, "sleep_time", None)),
|
| 181 |
+
}
|
| 182 |
+
return {key: val for key, val in out.items() if val is not None} or None
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
|
| 186 |
+
direct = _as_int(num_params)
|
| 187 |
+
if direct is not None:
|
| 188 |
+
return direct
|
| 189 |
+
|
| 190 |
+
total = getattr(safetensors, "total", None)
|
| 191 |
+
if total is None and isinstance(safetensors, dict):
|
| 192 |
+
total = safetensors.get("total")
|
| 193 |
+
return _as_int(total)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def _extract_num_params_from_object(row: Any) -> int | None:
|
| 197 |
+
raw_num_params = getattr(row, "num_params", None)
|
| 198 |
+
if raw_num_params is None:
|
| 199 |
+
raw_num_params = getattr(row, "numParameters", None)
|
| 200 |
+
if raw_num_params is None:
|
| 201 |
+
raw_num_params = getattr(row, "num_parameters", None)
|
| 202 |
+
return _extract_num_params(raw_num_params, getattr(row, "safetensors", None))
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def _extract_num_params_from_dict(row: dict[str, Any]) -> int | None:
|
| 206 |
+
raw_num_params = row.get("num_params")
|
| 207 |
+
if raw_num_params is None:
|
| 208 |
+
raw_num_params = row.get("numParameters")
|
| 209 |
+
if raw_num_params is None:
|
| 210 |
+
raw_num_params = row.get("num_parameters")
|
| 211 |
+
return _extract_num_params(raw_num_params, row.get("safetensors"))
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _extract_author_names(value: Any) -> list[str] | None:
|
| 215 |
+
if not isinstance(value, (list, tuple)):
|
| 216 |
+
return None
|
| 217 |
+
names: list[str] = []
|
| 218 |
+
for item in value:
|
| 219 |
+
if isinstance(item, str) and item.strip():
|
| 220 |
+
names.append(item.strip())
|
| 221 |
+
continue
|
| 222 |
+
if isinstance(item, dict):
|
| 223 |
+
name = item.get("name")
|
| 224 |
+
if isinstance(name, str) and name.strip():
|
| 225 |
+
names.append(name.strip())
|
| 226 |
+
continue
|
| 227 |
+
name = getattr(item, "name", None)
|
| 228 |
+
if isinstance(name, str) and name.strip():
|
| 229 |
+
names.append(name.strip())
|
| 230 |
+
return names or None
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def _extract_profile_name(value: Any) -> str | None:
|
| 234 |
+
if isinstance(value, str) and value.strip():
|
| 235 |
+
return value.strip()
|
| 236 |
+
if isinstance(value, dict):
|
| 237 |
+
for key in ("user", "name", "fullname", "handle"):
|
| 238 |
+
candidate = value.get(key)
|
| 239 |
+
if isinstance(candidate, str) and candidate.strip():
|
| 240 |
+
return candidate.strip()
|
| 241 |
+
return None
|
| 242 |
+
for attr in ("user", "name", "fullname", "handle"):
|
| 243 |
+
candidate = getattr(value, attr, None)
|
| 244 |
+
if isinstance(candidate, str) and candidate.strip():
|
| 245 |
+
return candidate.strip()
|
| 246 |
+
return None
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def _author_from_any(value: Any) -> str | None:
|
| 250 |
+
if isinstance(value, str) and value:
|
| 251 |
+
return value
|
| 252 |
+
if isinstance(value, dict):
|
| 253 |
+
for key in ("name", "username", "user", "login"):
|
| 254 |
+
candidate = value.get(key)
|
| 255 |
+
if isinstance(candidate, str) and candidate:
|
| 256 |
+
return candidate
|
| 257 |
+
return None
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def _dt_to_str(value: Any) -> str | None:
|
| 261 |
+
if value is None:
|
| 262 |
+
return None
|
| 263 |
+
iso = getattr(value, "isoformat", None)
|
| 264 |
+
if callable(iso):
|
| 265 |
+
try:
|
| 266 |
+
return str(iso())
|
| 267 |
+
except Exception:
|
| 268 |
+
pass
|
| 269 |
+
return str(value)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def _repo_web_url(repo_type: str, repo_id: str | None) -> str | None:
|
| 273 |
+
if not isinstance(repo_id, str) or not repo_id:
|
| 274 |
+
return None
|
| 275 |
+
base = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
|
| 276 |
+
rt = _canonical_repo_type(repo_type, default="")
|
| 277 |
+
if rt == "dataset":
|
| 278 |
+
return f"{base}/datasets/{repo_id}"
|
| 279 |
+
if rt == "space":
|
| 280 |
+
return f"{base}/spaces/{repo_id}"
|
| 281 |
+
return f"{base}/{repo_id}"
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def _build_repo_row(
|
| 285 |
+
*,
|
| 286 |
+
repo_id: Any,
|
| 287 |
+
repo_type: str,
|
| 288 |
+
author: Any = None,
|
| 289 |
+
likes: Any = None,
|
| 290 |
+
downloads: Any = None,
|
| 291 |
+
created_at: Any = None,
|
| 292 |
+
last_modified: Any = None,
|
| 293 |
+
pipeline_tag: Any = None,
|
| 294 |
+
num_params: Any = None,
|
| 295 |
+
private: Any = None,
|
| 296 |
+
trending_score: Any = None,
|
| 297 |
+
tags: Any = None,
|
| 298 |
+
sha: Any = None,
|
| 299 |
+
gated: Any = None,
|
| 300 |
+
library_name: Any = None,
|
| 301 |
+
description: Any = None,
|
| 302 |
+
paperswithcode_id: Any = None,
|
| 303 |
+
sdk: Any = None,
|
| 304 |
+
models: Any = None,
|
| 305 |
+
datasets: Any = None,
|
| 306 |
+
subdomain: Any = None,
|
| 307 |
+
runtime: Any = None,
|
| 308 |
+
runtime_stage: Any = None,
|
| 309 |
+
) -> dict[str, Any]:
|
| 310 |
+
rt = _canonical_repo_type(repo_type)
|
| 311 |
+
author_value = author
|
| 312 |
+
if (
|
| 313 |
+
not isinstance(author_value, str)
|
| 314 |
+
and isinstance(repo_id, str)
|
| 315 |
+
and "/" in repo_id
|
| 316 |
+
):
|
| 317 |
+
author_value = repo_id.split("/", 1)[0]
|
| 318 |
+
|
| 319 |
+
runtime_payload = _space_runtime_to_dict(runtime)
|
| 320 |
+
resolved_runtime_stage = (
|
| 321 |
+
runtime_stage
|
| 322 |
+
if runtime_stage is not None
|
| 323 |
+
else runtime_payload.get("stage")
|
| 324 |
+
if isinstance(runtime_payload, dict)
|
| 325 |
+
else None
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
return {
|
| 329 |
+
"id": repo_id,
|
| 330 |
+
"slug": repo_id,
|
| 331 |
+
"repo_id": repo_id,
|
| 332 |
+
"repo_type": rt,
|
| 333 |
+
"author": author_value,
|
| 334 |
+
"likes": _as_int(likes),
|
| 335 |
+
"downloads": _as_int(downloads),
|
| 336 |
+
"created_at": _dt_to_str(created_at),
|
| 337 |
+
"last_modified": _dt_to_str(last_modified),
|
| 338 |
+
"pipeline_tag": pipeline_tag,
|
| 339 |
+
"num_params": _as_int(num_params),
|
| 340 |
+
"private": private,
|
| 341 |
+
"trending_score": _as_int(trending_score)
|
| 342 |
+
if trending_score is not None
|
| 343 |
+
else None,
|
| 344 |
+
"repo_url": _repo_web_url(rt, repo_id if isinstance(repo_id, str) else None),
|
| 345 |
+
"tags": _optional_str_list(tags),
|
| 346 |
+
"sha": sha,
|
| 347 |
+
"gated": gated,
|
| 348 |
+
"library_name": library_name,
|
| 349 |
+
"description": description,
|
| 350 |
+
"paperswithcode_id": paperswithcode_id,
|
| 351 |
+
"sdk": sdk,
|
| 352 |
+
"models": _optional_str_list(models),
|
| 353 |
+
"datasets": _optional_str_list(datasets),
|
| 354 |
+
"subdomain": subdomain,
|
| 355 |
+
"runtime_stage": resolved_runtime_stage,
|
| 356 |
+
"runtime": runtime_payload,
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
|
| 361 |
+
return _build_repo_row(
|
| 362 |
+
repo_id=getattr(row, "id", None),
|
| 363 |
+
repo_type=repo_type,
|
| 364 |
+
author=getattr(row, "author", None),
|
| 365 |
+
likes=getattr(row, "likes", None),
|
| 366 |
+
downloads=getattr(row, "downloads", None),
|
| 367 |
+
created_at=getattr(row, "created_at", None),
|
| 368 |
+
last_modified=getattr(row, "last_modified", None),
|
| 369 |
+
pipeline_tag=getattr(row, "pipeline_tag", None),
|
| 370 |
+
num_params=_extract_num_params_from_object(row),
|
| 371 |
+
private=getattr(row, "private", None),
|
| 372 |
+
trending_score=getattr(row, "trending_score", None),
|
| 373 |
+
tags=getattr(row, "tags", None),
|
| 374 |
+
sha=getattr(row, "sha", None),
|
| 375 |
+
gated=getattr(row, "gated", None),
|
| 376 |
+
library_name=getattr(row, "library_name", None),
|
| 377 |
+
description=getattr(row, "description", None),
|
| 378 |
+
paperswithcode_id=getattr(row, "paperswithcode_id", None),
|
| 379 |
+
sdk=getattr(row, "sdk", None),
|
| 380 |
+
models=getattr(row, "models", None),
|
| 381 |
+
datasets=getattr(row, "datasets", None),
|
| 382 |
+
subdomain=getattr(row, "subdomain", None),
|
| 383 |
+
runtime=getattr(row, "runtime", None),
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def _normalize_repo_detail_row(
|
| 388 |
+
detail: Any, repo_type: str, repo_id: str
|
| 389 |
+
) -> dict[str, Any]:
|
| 390 |
+
row = _normalize_repo_search_row(detail, repo_type)
|
| 391 |
+
resolved_repo_id = row.get("repo_id") or repo_id
|
| 392 |
+
row["id"] = row.get("id") or resolved_repo_id
|
| 393 |
+
row["slug"] = row.get("slug") or resolved_repo_id
|
| 394 |
+
row["repo_id"] = resolved_repo_id
|
| 395 |
+
row["repo_url"] = _repo_web_url(repo_type, resolved_repo_id)
|
| 396 |
+
return row
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def _normalize_trending_row(
|
| 400 |
+
repo: dict[str, Any], default_repo_type: str, rank: int | None = None
|
| 401 |
+
) -> dict[str, Any]:
|
| 402 |
+
row = _build_repo_row(
|
| 403 |
+
repo_id=repo.get("id"),
|
| 404 |
+
repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
|
| 405 |
+
author=repo.get("author"),
|
| 406 |
+
likes=repo.get("likes"),
|
| 407 |
+
downloads=repo.get("downloads"),
|
| 408 |
+
created_at=repo.get("createdAt"),
|
| 409 |
+
last_modified=repo.get("lastModified"),
|
| 410 |
+
pipeline_tag=repo.get("pipeline_tag"),
|
| 411 |
+
num_params=_extract_num_params_from_dict(repo),
|
| 412 |
+
private=repo.get("private"),
|
| 413 |
+
trending_score=repo.get("trendingScore"),
|
| 414 |
+
tags=repo.get("tags"),
|
| 415 |
+
sha=repo.get("sha"),
|
| 416 |
+
gated=repo.get("gated"),
|
| 417 |
+
library_name=repo.get("library_name"),
|
| 418 |
+
description=repo.get("description"),
|
| 419 |
+
paperswithcode_id=repo.get("paperswithcode_id"),
|
| 420 |
+
sdk=repo.get("sdk"),
|
| 421 |
+
models=repo.get("models"),
|
| 422 |
+
datasets=repo.get("datasets"),
|
| 423 |
+
subdomain=repo.get("subdomain"),
|
| 424 |
+
runtime=repo.get("runtime"),
|
| 425 |
+
runtime_stage=repo.get("runtime_stage") or repo.get("runtimeStage"),
|
| 426 |
+
)
|
| 427 |
+
if rank is not None:
|
| 428 |
+
row["trending_rank"] = rank
|
| 429 |
+
return row
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
def _normalize_daily_paper_row(
|
| 433 |
+
row: dict[str, Any], rank: int | None = None
|
| 434 |
+
) -> dict[str, Any]:
|
| 435 |
+
paper = row.get("paper") if isinstance(row.get("paper"), dict) else {}
|
| 436 |
+
org = (
|
| 437 |
+
row.get("organization")
|
| 438 |
+
if isinstance(row.get("organization"), dict)
|
| 439 |
+
else paper.get("organization")
|
| 440 |
+
)
|
| 441 |
+
organization = None
|
| 442 |
+
if isinstance(org, dict):
|
| 443 |
+
organization = org.get("name") or org.get("fullname")
|
| 444 |
+
|
| 445 |
+
item = {
|
| 446 |
+
"paper_id": paper.get("id"),
|
| 447 |
+
"title": row.get("title") or paper.get("title"),
|
| 448 |
+
"summary": row.get("summary")
|
| 449 |
+
or paper.get("summary")
|
| 450 |
+
or paper.get("ai_summary"),
|
| 451 |
+
"published_at": row.get("publishedAt") or paper.get("publishedAt"),
|
| 452 |
+
"submitted_on_daily_at": paper.get("submittedOnDailyAt"),
|
| 453 |
+
"authors": _extract_author_names(paper.get("authors")),
|
| 454 |
+
"organization": organization,
|
| 455 |
+
"submitted_by": _extract_profile_name(
|
| 456 |
+
row.get("submittedBy") or paper.get("submittedOnDailyBy")
|
| 457 |
+
),
|
| 458 |
+
"discussion_id": paper.get("discussionId"),
|
| 459 |
+
"upvotes": _as_int(paper.get("upvotes")),
|
| 460 |
+
"github_repo_url": paper.get("githubRepo"),
|
| 461 |
+
"github_stars": _as_int(paper.get("githubStars")),
|
| 462 |
+
"project_page_url": paper.get("projectPage"),
|
| 463 |
+
"num_comments": _as_int(row.get("numComments")),
|
| 464 |
+
"is_author_participating": row.get("isAuthorParticipating")
|
| 465 |
+
if isinstance(row.get("isAuthorParticipating"), bool)
|
| 466 |
+
else None,
|
| 467 |
+
"repo_id": row.get("repo_id") or paper.get("repo_id"),
|
| 468 |
+
"rank": rank,
|
| 469 |
+
}
|
| 470 |
+
return item
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | None:
|
| 474 |
+
repo_id = row.get("id") or row.get("repoId") or row.get("repo_id")
|
| 475 |
+
if not isinstance(repo_id, str) or not repo_id:
|
| 476 |
+
return None
|
| 477 |
+
|
| 478 |
+
repo_type = _canonical_repo_type(
|
| 479 |
+
row.get("repoType") or row.get("repo_type") or row.get("type"), default=""
|
| 480 |
+
)
|
| 481 |
+
if repo_type not in {"model", "dataset", "space"}:
|
| 482 |
+
return None
|
| 483 |
+
|
| 484 |
+
return _build_repo_row(
|
| 485 |
+
repo_id=repo_id,
|
| 486 |
+
repo_type=repo_type,
|
| 487 |
+
author=row.get("author") or _author_from_any(row.get("authorData")),
|
| 488 |
+
likes=row.get("likes"),
|
| 489 |
+
downloads=row.get("downloads"),
|
| 490 |
+
created_at=row.get("createdAt") or row.get("created_at"),
|
| 491 |
+
last_modified=row.get("lastModified") or row.get("last_modified"),
|
| 492 |
+
pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
|
| 493 |
+
num_params=_extract_num_params_from_dict(row),
|
| 494 |
+
private=row.get("private"),
|
| 495 |
+
tags=row.get("tags"),
|
| 496 |
+
gated=row.get("gated"),
|
| 497 |
+
library_name=row.get("library_name") or row.get("libraryName"),
|
| 498 |
+
description=row.get("description"),
|
| 499 |
+
paperswithcode_id=row.get("paperswithcode_id") or row.get("paperswithcodeId"),
|
| 500 |
+
sdk=row.get("sdk"),
|
| 501 |
+
models=row.get("models"),
|
| 502 |
+
datasets=row.get("datasets"),
|
| 503 |
+
subdomain=row.get("subdomain"),
|
| 504 |
+
runtime=row.get("runtime"),
|
| 505 |
+
runtime_stage=row.get("runtime_stage") or row.get("runtimeStage"),
|
| 506 |
+
)
|
| 507 |
+
|
| 508 |
+
|
| 509 |
+
def _sort_repo_rows(
|
| 510 |
+
rows: list[dict[str, Any]], sort_key: str | None
|
| 511 |
+
) -> list[dict[str, Any]]:
|
| 512 |
+
if not sort_key:
|
| 513 |
+
return rows
|
| 514 |
+
|
| 515 |
+
if sort_key in {"likes", "downloads", "trending_score"}:
|
| 516 |
+
return sorted(
|
| 517 |
+
rows, key=lambda row: _as_int(row.get(sort_key)) or -1, reverse=True
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
if sort_key in {"created_at", "last_modified"}:
|
| 521 |
+
return sorted(rows, key=lambda row: str(row.get(sort_key) or ""), reverse=True)
|
| 522 |
+
|
| 523 |
+
return rows
|
| 524 |
+
|
| 525 |
+
|
| 526 |
+
def call_api_host(
|
| 527 |
+
endpoint: str,
|
| 528 |
+
*,
|
| 529 |
+
method: str = "GET",
|
| 530 |
+
params: dict[str, Any] | None = None,
|
| 531 |
+
json_body: dict[str, Any] | None = None,
|
| 532 |
+
timeout_sec: int = DEFAULT_TIMEOUT_SEC,
|
| 533 |
+
strict_mode: bool = False,
|
| 534 |
+
) -> dict[str, Any]:
|
| 535 |
+
method_u = method.upper().strip()
|
| 536 |
+
if method_u not in {"GET", "POST"}:
|
| 537 |
+
raise ValueError("Only GET and POST are supported")
|
| 538 |
+
|
| 539 |
+
ep = _normalize_endpoint(endpoint)
|
| 540 |
+
if not _endpoint_allowed(ep, strict_mode):
|
| 541 |
+
raise ValueError(f"Endpoint not allowed: {ep}")
|
| 542 |
+
|
| 543 |
+
params = _sanitize_params(ep, params)
|
| 544 |
+
if ep == "/api/recent-activity":
|
| 545 |
+
feed_type = str((params or {}).get("feedType", "")).strip().lower()
|
| 546 |
+
if feed_type not in {"user", "org"}:
|
| 547 |
+
raise ValueError("/api/recent-activity requires feedType=user|org")
|
| 548 |
+
if not str((params or {}).get("entity", "")).strip():
|
| 549 |
+
raise ValueError("/api/recent-activity requires entity")
|
| 550 |
+
|
| 551 |
+
base = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
|
| 552 |
+
q = urlencode(params or {}, doseq=True)
|
| 553 |
+
url = f"{base}{ep}" + (f"?{q}" if q else "")
|
| 554 |
+
|
| 555 |
+
headers = {"Accept": "application/json"}
|
| 556 |
+
token = _load_token()
|
| 557 |
+
if token:
|
| 558 |
+
headers["Authorization"] = f"Bearer {token}"
|
| 559 |
+
|
| 560 |
+
data = None
|
| 561 |
+
if method_u == "POST":
|
| 562 |
+
headers["Content-Type"] = "application/json"
|
| 563 |
+
data = json.dumps(json_body or {}).encode("utf-8")
|
| 564 |
+
|
| 565 |
+
req = Request(url, method=method_u, headers=headers, data=data)
|
| 566 |
+
try:
|
| 567 |
+
with urlopen(req, timeout=timeout_sec) as res:
|
| 568 |
+
payload = _json_best_effort(res.read())
|
| 569 |
+
return {
|
| 570 |
+
"ok": True,
|
| 571 |
+
"status": int(res.status),
|
| 572 |
+
"url": url,
|
| 573 |
+
"data": payload,
|
| 574 |
+
"error": None,
|
| 575 |
+
}
|
| 576 |
+
except HTTPError as e:
|
| 577 |
+
payload = _json_best_effort(e.read())
|
| 578 |
+
err = (
|
| 579 |
+
payload
|
| 580 |
+
if isinstance(payload, str)
|
| 581 |
+
else json.dumps(payload, ensure_ascii=False)[:1000]
|
| 582 |
+
)
|
| 583 |
+
return {
|
| 584 |
+
"ok": False,
|
| 585 |
+
"status": int(e.code),
|
| 586 |
+
"url": url,
|
| 587 |
+
"data": payload,
|
| 588 |
+
"error": err,
|
| 589 |
+
}
|
| 590 |
+
except URLError as e:
|
| 591 |
+
return {
|
| 592 |
+
"ok": False,
|
| 593 |
+
"status": 0,
|
| 594 |
+
"url": url,
|
| 595 |
+
"data": None,
|
| 596 |
+
"error": f"Network error: {e}",
|
| 597 |
+
}
|
.prod/monty_api/query_entrypoints.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import asyncio
|
| 5 |
+
import inspect
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import time
|
| 10 |
+
from typing import Any, Callable
|
| 11 |
+
|
| 12 |
+
from .constants import (
|
| 13 |
+
DEFAULT_MAX_CALLS,
|
| 14 |
+
DEFAULT_MONTY_MAX_ALLOCATIONS,
|
| 15 |
+
DEFAULT_MONTY_MAX_MEMORY,
|
| 16 |
+
DEFAULT_MONTY_MAX_RECURSION_DEPTH,
|
| 17 |
+
DEFAULT_TIMEOUT_SEC,
|
| 18 |
+
INTERNAL_STRICT_MODE,
|
| 19 |
+
MAX_CALLS_LIMIT,
|
| 20 |
+
)
|
| 21 |
+
from .runtime_context import build_runtime_helper_environment
|
| 22 |
+
from .validation import (
|
| 23 |
+
_coerce_jsonish_python_literals,
|
| 24 |
+
_summarize_limit_hit,
|
| 25 |
+
_truncate_result_payload,
|
| 26 |
+
_validate_generated_code,
|
| 27 |
+
_wrap_raw_result,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class MontyExecutionError(RuntimeError):
|
| 32 |
+
def __init__(self, message: str, api_calls: int, trace: list[dict[str, Any]]):
|
| 33 |
+
super().__init__(message)
|
| 34 |
+
self.api_calls = api_calls
|
| 35 |
+
self.trace = trace
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _query_debug_enabled() -> bool:
|
| 39 |
+
value = os.environ.get("MONTY_DEBUG_QUERY", "")
|
| 40 |
+
return value.strip().lower() in {"1", "true", "yes", "on"}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _log_generated_query(
|
| 44 |
+
*, query: str, code: str, max_calls: int | None, timeout_sec: int | None
|
| 45 |
+
) -> None:
|
| 46 |
+
if not _query_debug_enabled():
|
| 47 |
+
return
|
| 48 |
+
print("[monty-debug] query:", file=sys.stderr)
|
| 49 |
+
print(query, file=sys.stderr)
|
| 50 |
+
print("[monty-debug] max_calls:", max_calls, file=sys.stderr)
|
| 51 |
+
print("[monty-debug] timeout_sec:", timeout_sec, file=sys.stderr)
|
| 52 |
+
print("[monty-debug] code:", file=sys.stderr)
|
| 53 |
+
print(code, file=sys.stderr)
|
| 54 |
+
sys.stderr.flush()
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _introspect_helper_signatures() -> dict[str, set[str]]:
|
| 58 |
+
env = build_runtime_helper_environment(
|
| 59 |
+
max_calls=DEFAULT_MAX_CALLS,
|
| 60 |
+
strict_mode=INTERNAL_STRICT_MODE,
|
| 61 |
+
timeout_sec=DEFAULT_TIMEOUT_SEC,
|
| 62 |
+
)
|
| 63 |
+
signatures = {
|
| 64 |
+
name: {
|
| 65 |
+
parameter.name for parameter in inspect.signature(fn).parameters.values()
|
| 66 |
+
}
|
| 67 |
+
for name, fn in env.helper_functions.items()
|
| 68 |
+
}
|
| 69 |
+
return signatures
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
async def _run_with_monty(
|
| 73 |
+
*,
|
| 74 |
+
code: str,
|
| 75 |
+
query: str,
|
| 76 |
+
max_calls: int,
|
| 77 |
+
strict_mode: bool,
|
| 78 |
+
timeout_sec: int,
|
| 79 |
+
) -> dict[str, Any]:
|
| 80 |
+
try:
|
| 81 |
+
import pydantic_monty
|
| 82 |
+
except Exception as e:
|
| 83 |
+
raise RuntimeError(
|
| 84 |
+
"pydantic_monty is not installed. Install with `uv pip install pydantic-monty`."
|
| 85 |
+
) from e
|
| 86 |
+
|
| 87 |
+
env = build_runtime_helper_environment(
|
| 88 |
+
max_calls=max_calls,
|
| 89 |
+
strict_mode=strict_mode,
|
| 90 |
+
timeout_sec=timeout_sec,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
m = pydantic_monty.Monty(
|
| 94 |
+
code,
|
| 95 |
+
inputs=["query", "max_calls"],
|
| 96 |
+
script_name="monty_agent.py",
|
| 97 |
+
type_check=False,
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
def _collecting_wrapper(
|
| 101 |
+
helper_name: str, fn: Callable[..., Any]
|
| 102 |
+
) -> Callable[..., Any]:
|
| 103 |
+
async def wrapped(*args: Any, **kwargs: Any) -> Any:
|
| 104 |
+
result = await fn(*args, **kwargs)
|
| 105 |
+
summary = _summarize_limit_hit(helper_name, result)
|
| 106 |
+
if summary is not None and len(env.limit_summaries) < 20:
|
| 107 |
+
env.limit_summaries.append(summary)
|
| 108 |
+
return result
|
| 109 |
+
|
| 110 |
+
return wrapped
|
| 111 |
+
|
| 112 |
+
limits: pydantic_monty.ResourceLimits = {
|
| 113 |
+
"max_duration_secs": float(timeout_sec),
|
| 114 |
+
"max_memory": DEFAULT_MONTY_MAX_MEMORY,
|
| 115 |
+
"max_allocations": DEFAULT_MONTY_MAX_ALLOCATIONS,
|
| 116 |
+
"max_recursion_depth": DEFAULT_MONTY_MAX_RECURSION_DEPTH,
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
result = await pydantic_monty.run_monty_async(
|
| 121 |
+
m,
|
| 122 |
+
inputs={"query": query, "max_calls": max_calls},
|
| 123 |
+
external_functions={
|
| 124 |
+
name: _collecting_wrapper(name, fn)
|
| 125 |
+
for name, fn in env.helper_functions.items()
|
| 126 |
+
},
|
| 127 |
+
limits=limits,
|
| 128 |
+
)
|
| 129 |
+
except Exception as e:
|
| 130 |
+
raise MontyExecutionError(str(e), env.call_count["n"], env.trace) from e
|
| 131 |
+
|
| 132 |
+
if env.call_count["n"] == 0:
|
| 133 |
+
if env.internal_helper_used["used"]:
|
| 134 |
+
return {
|
| 135 |
+
"output": _truncate_result_payload(result),
|
| 136 |
+
"api_calls": env.call_count["n"],
|
| 137 |
+
"trace": env.trace,
|
| 138 |
+
"limit_summaries": env.limit_summaries,
|
| 139 |
+
}
|
| 140 |
+
if isinstance(result, dict) and result.get("ok") is True:
|
| 141 |
+
meta = result.get("meta") if isinstance(result.get("meta"), dict) else {}
|
| 142 |
+
source = meta.get("source")
|
| 143 |
+
if isinstance(source, str) and source.startswith("internal://"):
|
| 144 |
+
return {
|
| 145 |
+
"output": _truncate_result_payload(result),
|
| 146 |
+
"api_calls": env.call_count["n"],
|
| 147 |
+
"trace": env.trace,
|
| 148 |
+
"limit_summaries": env.limit_summaries,
|
| 149 |
+
}
|
| 150 |
+
latest_helper_error = env.latest_helper_error_box.get("value")
|
| 151 |
+
if latest_helper_error is not None:
|
| 152 |
+
return {
|
| 153 |
+
"output": _truncate_result_payload(latest_helper_error),
|
| 154 |
+
"api_calls": env.call_count["n"],
|
| 155 |
+
"trace": env.trace,
|
| 156 |
+
"limit_summaries": env.limit_summaries,
|
| 157 |
+
}
|
| 158 |
+
if (
|
| 159 |
+
isinstance(result, dict)
|
| 160 |
+
and result.get("ok") is False
|
| 161 |
+
and isinstance(result.get("error"), str)
|
| 162 |
+
):
|
| 163 |
+
return {
|
| 164 |
+
"output": _truncate_result_payload(result),
|
| 165 |
+
"api_calls": env.call_count["n"],
|
| 166 |
+
"trace": env.trace,
|
| 167 |
+
"limit_summaries": env.limit_summaries,
|
| 168 |
+
}
|
| 169 |
+
raise MontyExecutionError(
|
| 170 |
+
"Code completed without calling any external API function",
|
| 171 |
+
env.call_count["n"],
|
| 172 |
+
env.trace,
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
if not any(step.get("ok") is True for step in env.trace):
|
| 176 |
+
if (
|
| 177 |
+
isinstance(result, dict)
|
| 178 |
+
and result.get("ok") is False
|
| 179 |
+
and isinstance(result.get("error"), str)
|
| 180 |
+
):
|
| 181 |
+
return {
|
| 182 |
+
"output": _truncate_result_payload(result),
|
| 183 |
+
"api_calls": env.call_count["n"],
|
| 184 |
+
"trace": env.trace,
|
| 185 |
+
"limit_summaries": env.limit_summaries,
|
| 186 |
+
}
|
| 187 |
+
raise MontyExecutionError(
|
| 188 |
+
"Code completed without a successful API call; refusing non-live fallback result",
|
| 189 |
+
env.call_count["n"],
|
| 190 |
+
env.trace,
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
return {
|
| 194 |
+
"output": _truncate_result_payload(result),
|
| 195 |
+
"api_calls": env.call_count["n"],
|
| 196 |
+
"trace": env.trace,
|
| 197 |
+
"limit_summaries": env.limit_summaries,
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _prepare_query_inputs(
|
| 202 |
+
*,
|
| 203 |
+
query: str,
|
| 204 |
+
code: str,
|
| 205 |
+
max_calls: int | None,
|
| 206 |
+
timeout_sec: int | None,
|
| 207 |
+
) -> tuple[str, str, int, int]:
|
| 208 |
+
if not query or not query.strip():
|
| 209 |
+
raise ValueError("query is required")
|
| 210 |
+
if not code or not code.strip():
|
| 211 |
+
raise ValueError("code is required")
|
| 212 |
+
|
| 213 |
+
resolved_max_calls = DEFAULT_MAX_CALLS if max_calls is None else max_calls
|
| 214 |
+
resolved_timeout_sec = DEFAULT_TIMEOUT_SEC if timeout_sec is None else timeout_sec
|
| 215 |
+
normalized_max_calls = max(1, min(int(resolved_max_calls), MAX_CALLS_LIMIT))
|
| 216 |
+
normalized_timeout_sec = int(resolved_timeout_sec)
|
| 217 |
+
normalized_code = _coerce_jsonish_python_literals(code.strip())
|
| 218 |
+
_validate_generated_code(normalized_code)
|
| 219 |
+
return query, normalized_code, normalized_max_calls, normalized_timeout_sec
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
async def _execute_query(
|
| 223 |
+
*,
|
| 224 |
+
query: str,
|
| 225 |
+
code: str,
|
| 226 |
+
max_calls: int | None,
|
| 227 |
+
timeout_sec: int | None,
|
| 228 |
+
) -> dict[str, Any]:
|
| 229 |
+
prepared_query, prepared_code, prepared_max_calls, prepared_timeout = (
|
| 230 |
+
_prepare_query_inputs(
|
| 231 |
+
query=query,
|
| 232 |
+
code=code,
|
| 233 |
+
max_calls=max_calls,
|
| 234 |
+
timeout_sec=timeout_sec,
|
| 235 |
+
)
|
| 236 |
+
)
|
| 237 |
+
_log_generated_query(
|
| 238 |
+
query=prepared_query,
|
| 239 |
+
code=prepared_code,
|
| 240 |
+
max_calls=prepared_max_calls,
|
| 241 |
+
timeout_sec=prepared_timeout,
|
| 242 |
+
)
|
| 243 |
+
return await _run_with_monty(
|
| 244 |
+
code=prepared_code,
|
| 245 |
+
query=prepared_query,
|
| 246 |
+
max_calls=prepared_max_calls,
|
| 247 |
+
strict_mode=INTERNAL_STRICT_MODE,
|
| 248 |
+
timeout_sec=prepared_timeout,
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
async def hf_hub_query(
|
| 253 |
+
query: str,
|
| 254 |
+
code: str,
|
| 255 |
+
max_calls: int | None = DEFAULT_MAX_CALLS,
|
| 256 |
+
timeout_sec: int | None = DEFAULT_TIMEOUT_SEC,
|
| 257 |
+
) -> dict[str, Any]:
|
| 258 |
+
"""Use natural-language queries to explore the Hugging Face Hub.
|
| 259 |
+
|
| 260 |
+
Best for read-only Hub discovery, lookup, ranking, and relationship questions
|
| 261 |
+
across users, organizations, repositories, activity, followers, likes,
|
| 262 |
+
discussions, and collections.
|
| 263 |
+
"""
|
| 264 |
+
try:
|
| 265 |
+
run = await _execute_query(
|
| 266 |
+
query=query,
|
| 267 |
+
code=code,
|
| 268 |
+
max_calls=max_calls,
|
| 269 |
+
timeout_sec=timeout_sec,
|
| 270 |
+
)
|
| 271 |
+
return {
|
| 272 |
+
"ok": True,
|
| 273 |
+
"data": run["output"],
|
| 274 |
+
"error": None,
|
| 275 |
+
"api_calls": run["api_calls"],
|
| 276 |
+
}
|
| 277 |
+
except MontyExecutionError as e:
|
| 278 |
+
return {
|
| 279 |
+
"ok": False,
|
| 280 |
+
"data": None,
|
| 281 |
+
"error": str(e),
|
| 282 |
+
"api_calls": e.api_calls,
|
| 283 |
+
}
|
| 284 |
+
except Exception as e:
|
| 285 |
+
return {
|
| 286 |
+
"ok": False,
|
| 287 |
+
"data": None,
|
| 288 |
+
"error": str(e),
|
| 289 |
+
"api_calls": 0,
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
async def hf_hub_query_raw(
|
| 294 |
+
query: str,
|
| 295 |
+
code: str,
|
| 296 |
+
max_calls: int | None = DEFAULT_MAX_CALLS,
|
| 297 |
+
timeout_sec: int | None = DEFAULT_TIMEOUT_SEC,
|
| 298 |
+
) -> Any:
|
| 299 |
+
"""Use natural-language queries to explore the Hugging Face Hub in raw mode.
|
| 300 |
+
|
| 301 |
+
Best for read-only Hub discovery, lookup, ranking, and relationship
|
| 302 |
+
questions when the caller wants a runtime-owned raw envelope:
|
| 303 |
+
``result`` contains the direct ``solve(...)`` output and ``meta`` contains
|
| 304 |
+
execution details such as timing, call counts, and limit summaries.
|
| 305 |
+
"""
|
| 306 |
+
started = time.perf_counter()
|
| 307 |
+
try:
|
| 308 |
+
run = await _execute_query(
|
| 309 |
+
query=query,
|
| 310 |
+
code=code,
|
| 311 |
+
max_calls=max_calls,
|
| 312 |
+
timeout_sec=timeout_sec,
|
| 313 |
+
)
|
| 314 |
+
elapsed_ms = int((time.perf_counter() - started) * 1000)
|
| 315 |
+
return _wrap_raw_result(
|
| 316 |
+
run["output"],
|
| 317 |
+
ok=True,
|
| 318 |
+
api_calls=run["api_calls"],
|
| 319 |
+
elapsed_ms=elapsed_ms,
|
| 320 |
+
limit_summaries=run.get("limit_summaries"),
|
| 321 |
+
)
|
| 322 |
+
except MontyExecutionError as e:
|
| 323 |
+
elapsed_ms = int((time.perf_counter() - started) * 1000)
|
| 324 |
+
return _wrap_raw_result(
|
| 325 |
+
None,
|
| 326 |
+
ok=False,
|
| 327 |
+
api_calls=e.api_calls,
|
| 328 |
+
elapsed_ms=elapsed_ms,
|
| 329 |
+
error=str(e),
|
| 330 |
+
)
|
| 331 |
+
except Exception as e:
|
| 332 |
+
elapsed_ms = int((time.perf_counter() - started) * 1000)
|
| 333 |
+
return _wrap_raw_result(
|
| 334 |
+
None,
|
| 335 |
+
ok=False,
|
| 336 |
+
api_calls=0,
|
| 337 |
+
elapsed_ms=elapsed_ms,
|
| 338 |
+
error=str(e),
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def _arg_parser() -> argparse.ArgumentParser:
|
| 343 |
+
p = argparse.ArgumentParser(description="Monty-backed API chaining tool (v3)")
|
| 344 |
+
p.add_argument("--query", required=True, help="Natural language query")
|
| 345 |
+
p.add_argument("--code", default=None, help="Inline Monty code to execute")
|
| 346 |
+
p.add_argument(
|
| 347 |
+
"--code-file", default=None, help="Path to .py file with Monty code to execute"
|
| 348 |
+
)
|
| 349 |
+
p.add_argument(
|
| 350 |
+
"--max-calls",
|
| 351 |
+
type=int,
|
| 352 |
+
default=DEFAULT_MAX_CALLS,
|
| 353 |
+
help="Max external API/helper calls",
|
| 354 |
+
)
|
| 355 |
+
p.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT_SEC)
|
| 356 |
+
return p
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
def main() -> int:
|
| 360 |
+
args = _arg_parser().parse_args()
|
| 361 |
+
code = args.code
|
| 362 |
+
if args.code_file:
|
| 363 |
+
with open(args.code_file, "r", encoding="utf-8") as f:
|
| 364 |
+
code = f.read()
|
| 365 |
+
|
| 366 |
+
if not code:
|
| 367 |
+
print(
|
| 368 |
+
json.dumps(
|
| 369 |
+
{"ok": False, "error": "Either --code or --code-file is required"},
|
| 370 |
+
ensure_ascii=False,
|
| 371 |
+
)
|
| 372 |
+
)
|
| 373 |
+
return 1
|
| 374 |
+
|
| 375 |
+
try:
|
| 376 |
+
out = asyncio.run(
|
| 377 |
+
hf_hub_query(
|
| 378 |
+
query=args.query,
|
| 379 |
+
code=code,
|
| 380 |
+
max_calls=args.max_calls,
|
| 381 |
+
timeout_sec=args.timeout,
|
| 382 |
+
)
|
| 383 |
+
)
|
| 384 |
+
print(json.dumps(out, ensure_ascii=False))
|
| 385 |
+
return 0 if out.get("ok") else 1
|
| 386 |
+
except Exception as e:
|
| 387 |
+
print(json.dumps({"ok": False, "error": str(e)}, ensure_ascii=False))
|
| 388 |
+
return 1
|
.prod/monty_api/registry.py
ADDED
|
@@ -0,0 +1,681 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import Any, Mapping, NamedTuple
|
| 5 |
+
|
| 6 |
+
from .constants import (
|
| 7 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 8 |
+
ACTOR_CANONICAL_FIELDS,
|
| 9 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 10 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 11 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 12 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 13 |
+
GRAPH_SCAN_LIMIT_CAP,
|
| 14 |
+
LIKES_ENRICHMENT_MAX_REPOS,
|
| 15 |
+
LIKES_RANKING_WINDOW_DEFAULT,
|
| 16 |
+
LIKES_SCAN_LIMIT_CAP,
|
| 17 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 18 |
+
PROFILE_CANONICAL_FIELDS,
|
| 19 |
+
RECENT_ACTIVITY_PAGE_SIZE,
|
| 20 |
+
RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 21 |
+
REPO_CANONICAL_FIELDS,
|
| 22 |
+
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 23 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class RepoApiAdapter(NamedTuple):
|
| 28 |
+
list_method_name: str
|
| 29 |
+
detail_method_name: str
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@dataclass(frozen=True)
|
| 33 |
+
class HelperConfig:
|
| 34 |
+
name: str
|
| 35 |
+
endpoint_patterns: tuple[str, ...] = ()
|
| 36 |
+
default_metadata: Mapping[str, Any] = field(default_factory=dict)
|
| 37 |
+
pagination: Mapping[str, Any] = field(default_factory=dict)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
REPO_SEARCH_EXTRA_ARGS: dict[str, set[str]] = {
|
| 41 |
+
"dataset": {
|
| 42 |
+
"benchmark",
|
| 43 |
+
"dataset_name",
|
| 44 |
+
"expand",
|
| 45 |
+
"full",
|
| 46 |
+
"gated",
|
| 47 |
+
"language",
|
| 48 |
+
"language_creators",
|
| 49 |
+
"multilinguality",
|
| 50 |
+
"size_categories",
|
| 51 |
+
"task_categories",
|
| 52 |
+
"task_ids",
|
| 53 |
+
},
|
| 54 |
+
"model": {
|
| 55 |
+
"apps",
|
| 56 |
+
"cardData",
|
| 57 |
+
"emissions_thresholds",
|
| 58 |
+
"expand",
|
| 59 |
+
"fetch_config",
|
| 60 |
+
"full",
|
| 61 |
+
"gated",
|
| 62 |
+
"inference",
|
| 63 |
+
"inference_provider",
|
| 64 |
+
"model_name",
|
| 65 |
+
"pipeline_tag",
|
| 66 |
+
"trained_dataset",
|
| 67 |
+
},
|
| 68 |
+
"space": {"datasets", "expand", "full", "linked", "models"},
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
REPO_SEARCH_DEFAULT_EXPAND: dict[str, list[str]] = {
|
| 72 |
+
"dataset": [
|
| 73 |
+
"author",
|
| 74 |
+
"createdAt",
|
| 75 |
+
"description",
|
| 76 |
+
"downloads",
|
| 77 |
+
"gated",
|
| 78 |
+
"lastModified",
|
| 79 |
+
"likes",
|
| 80 |
+
"paperswithcode_id",
|
| 81 |
+
"private",
|
| 82 |
+
"sha",
|
| 83 |
+
"tags",
|
| 84 |
+
"trendingScore",
|
| 85 |
+
],
|
| 86 |
+
"model": [
|
| 87 |
+
"author",
|
| 88 |
+
"createdAt",
|
| 89 |
+
"downloads",
|
| 90 |
+
"gated",
|
| 91 |
+
"lastModified",
|
| 92 |
+
"library_name",
|
| 93 |
+
"likes",
|
| 94 |
+
"pipeline_tag",
|
| 95 |
+
"private",
|
| 96 |
+
"safetensors",
|
| 97 |
+
"sha",
|
| 98 |
+
"tags",
|
| 99 |
+
"trendingScore",
|
| 100 |
+
],
|
| 101 |
+
"space": [
|
| 102 |
+
"author",
|
| 103 |
+
"createdAt",
|
| 104 |
+
"datasets",
|
| 105 |
+
"lastModified",
|
| 106 |
+
"likes",
|
| 107 |
+
"models",
|
| 108 |
+
"private",
|
| 109 |
+
"runtime",
|
| 110 |
+
"sdk",
|
| 111 |
+
"sha",
|
| 112 |
+
"subdomain",
|
| 113 |
+
"tags",
|
| 114 |
+
"trendingScore",
|
| 115 |
+
],
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
# NOTE:
|
| 119 |
+
# The huggingface_hub client type literals currently advertise a few expand values
|
| 120 |
+
# that the live Hub API rejects (`childrenModelCount`, `usedStorage`) and omits a
|
| 121 |
+
# few that the API now accepts (`xetEnabled`, `gitalyUid`). Keep this allowlist in
|
| 122 |
+
# sync with the live API error contract rather than the client typing surface so we
|
| 123 |
+
# can sanitize generated requests before they hit the network.
|
| 124 |
+
REPO_SEARCH_ALLOWED_EXPAND: dict[str, list[str]] = {
|
| 125 |
+
"dataset": [
|
| 126 |
+
"author",
|
| 127 |
+
"cardData",
|
| 128 |
+
"citation",
|
| 129 |
+
"createdAt",
|
| 130 |
+
"description",
|
| 131 |
+
"disabled",
|
| 132 |
+
"downloads",
|
| 133 |
+
"downloadsAllTime",
|
| 134 |
+
"gated",
|
| 135 |
+
"lastModified",
|
| 136 |
+
"likes",
|
| 137 |
+
"paperswithcode_id",
|
| 138 |
+
"private",
|
| 139 |
+
"resourceGroup",
|
| 140 |
+
"sha",
|
| 141 |
+
"siblings",
|
| 142 |
+
"tags",
|
| 143 |
+
"trendingScore",
|
| 144 |
+
"xetEnabled",
|
| 145 |
+
"gitalyUid",
|
| 146 |
+
],
|
| 147 |
+
"model": [
|
| 148 |
+
"author",
|
| 149 |
+
"baseModels",
|
| 150 |
+
"cardData",
|
| 151 |
+
"config",
|
| 152 |
+
"createdAt",
|
| 153 |
+
"disabled",
|
| 154 |
+
"downloads",
|
| 155 |
+
"downloadsAllTime",
|
| 156 |
+
"evalResults",
|
| 157 |
+
"gated",
|
| 158 |
+
"gguf",
|
| 159 |
+
"inference",
|
| 160 |
+
"inferenceProviderMapping",
|
| 161 |
+
"lastModified",
|
| 162 |
+
"library_name",
|
| 163 |
+
"likes",
|
| 164 |
+
"mask_token",
|
| 165 |
+
"model-index",
|
| 166 |
+
"pipeline_tag",
|
| 167 |
+
"private",
|
| 168 |
+
"resourceGroup",
|
| 169 |
+
"safetensors",
|
| 170 |
+
"sha",
|
| 171 |
+
"siblings",
|
| 172 |
+
"spaces",
|
| 173 |
+
"tags",
|
| 174 |
+
"transformersInfo",
|
| 175 |
+
"trendingScore",
|
| 176 |
+
"widgetData",
|
| 177 |
+
"xetEnabled",
|
| 178 |
+
"gitalyUid",
|
| 179 |
+
],
|
| 180 |
+
"space": [
|
| 181 |
+
"author",
|
| 182 |
+
"cardData",
|
| 183 |
+
"createdAt",
|
| 184 |
+
"datasets",
|
| 185 |
+
"disabled",
|
| 186 |
+
"lastModified",
|
| 187 |
+
"likes",
|
| 188 |
+
"models",
|
| 189 |
+
"private",
|
| 190 |
+
"resourceGroup",
|
| 191 |
+
"runtime",
|
| 192 |
+
"sdk",
|
| 193 |
+
"sha",
|
| 194 |
+
"siblings",
|
| 195 |
+
"subdomain",
|
| 196 |
+
"tags",
|
| 197 |
+
"trendingScore",
|
| 198 |
+
"xetEnabled",
|
| 199 |
+
"gitalyUid",
|
| 200 |
+
],
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
RUNTIME_CAPABILITY_FIELDS = [
|
| 204 |
+
"allowed_sections",
|
| 205 |
+
"overview",
|
| 206 |
+
"helpers",
|
| 207 |
+
"helper_contracts",
|
| 208 |
+
"helper_defaults",
|
| 209 |
+
"fields",
|
| 210 |
+
"limits",
|
| 211 |
+
"repo_search",
|
| 212 |
+
]
|
| 213 |
+
REPO_SUMMARY_FIELDS = list(REPO_CANONICAL_FIELDS)
|
| 214 |
+
REPO_SUMMARY_OPTIONAL_FIELDS = [
|
| 215 |
+
field
|
| 216 |
+
for field in REPO_CANONICAL_FIELDS
|
| 217 |
+
if field not in {"repo_id", "repo_type", "author", "repo_url"}
|
| 218 |
+
]
|
| 219 |
+
ACTOR_OPTIONAL_FIELDS = [
|
| 220 |
+
field for field in ACTOR_CANONICAL_FIELDS if field != "username"
|
| 221 |
+
]
|
| 222 |
+
PROFILE_OPTIONAL_FIELDS = [
|
| 223 |
+
field
|
| 224 |
+
for field in PROFILE_CANONICAL_FIELDS
|
| 225 |
+
if field not in {"handle", "entity_type"}
|
| 226 |
+
]
|
| 227 |
+
TRENDING_DEFAULT_FIELDS = [*REPO_SUMMARY_FIELDS, "trending_rank"]
|
| 228 |
+
TRENDING_OPTIONAL_FIELDS = [
|
| 229 |
+
field
|
| 230 |
+
for field in TRENDING_DEFAULT_FIELDS
|
| 231 |
+
if field not in {"repo_id", "repo_type", "author", "repo_url", "trending_rank"}
|
| 232 |
+
]
|
| 233 |
+
DAILY_PAPER_DEFAULT_FIELDS = list(DAILY_PAPER_CANONICAL_FIELDS)
|
| 234 |
+
DAILY_PAPER_OPTIONAL_FIELDS = [
|
| 235 |
+
field
|
| 236 |
+
for field in DAILY_PAPER_CANONICAL_FIELDS
|
| 237 |
+
if field not in {"paper_id", "title", "published_at", "rank"}
|
| 238 |
+
]
|
| 239 |
+
COLLECTION_DEFAULT_FIELDS = list(COLLECTION_CANONICAL_FIELDS)
|
| 240 |
+
COLLECTION_OPTIONAL_FIELDS = [
|
| 241 |
+
field
|
| 242 |
+
for field in COLLECTION_CANONICAL_FIELDS
|
| 243 |
+
if field not in {"collection_id", "title", "owner"}
|
| 244 |
+
]
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def _metadata(
|
| 248 |
+
*,
|
| 249 |
+
default_fields: list[str],
|
| 250 |
+
guaranteed_fields: list[str],
|
| 251 |
+
notes: str,
|
| 252 |
+
optional_fields: list[str] | None = None,
|
| 253 |
+
default_upstream_calls: int = 1,
|
| 254 |
+
may_fan_out: bool = False,
|
| 255 |
+
default_limit: int | None = None,
|
| 256 |
+
max_limit: int | None = None,
|
| 257 |
+
) -> dict[str, Any]:
|
| 258 |
+
metadata: dict[str, Any] = {
|
| 259 |
+
"default_fields": list(default_fields),
|
| 260 |
+
"guaranteed_fields": list(guaranteed_fields),
|
| 261 |
+
"optional_fields": list(
|
| 262 |
+
optional_fields
|
| 263 |
+
if optional_fields is not None
|
| 264 |
+
else [
|
| 265 |
+
field for field in default_fields if field not in set(guaranteed_fields)
|
| 266 |
+
]
|
| 267 |
+
),
|
| 268 |
+
"default_upstream_calls": default_upstream_calls,
|
| 269 |
+
"may_fan_out": may_fan_out,
|
| 270 |
+
"notes": notes,
|
| 271 |
+
}
|
| 272 |
+
if default_limit is not None:
|
| 273 |
+
metadata["default_limit"] = default_limit
|
| 274 |
+
if max_limit is not None:
|
| 275 |
+
metadata["max_limit"] = max_limit
|
| 276 |
+
return metadata
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def _config(
|
| 280 |
+
name: str,
|
| 281 |
+
*,
|
| 282 |
+
endpoint_patterns: tuple[str, ...] = (),
|
| 283 |
+
default_metadata: Mapping[str, Any],
|
| 284 |
+
pagination: Mapping[str, Any] | None = None,
|
| 285 |
+
) -> HelperConfig:
|
| 286 |
+
return HelperConfig(
|
| 287 |
+
name=name,
|
| 288 |
+
endpoint_patterns=endpoint_patterns,
|
| 289 |
+
default_metadata=dict(default_metadata),
|
| 290 |
+
pagination=dict(pagination or {}),
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
HELPER_CONFIGS: dict[str, HelperConfig] = {
|
| 295 |
+
"hf_runtime_capabilities": _config(
|
| 296 |
+
"hf_runtime_capabilities",
|
| 297 |
+
default_metadata=_metadata(
|
| 298 |
+
default_fields=RUNTIME_CAPABILITY_FIELDS,
|
| 299 |
+
guaranteed_fields=RUNTIME_CAPABILITY_FIELDS,
|
| 300 |
+
optional_fields=[],
|
| 301 |
+
default_upstream_calls=0,
|
| 302 |
+
notes="Introspection helper. Use section=... to narrow the response.",
|
| 303 |
+
),
|
| 304 |
+
),
|
| 305 |
+
"hf_whoami": _config(
|
| 306 |
+
"hf_whoami",
|
| 307 |
+
endpoint_patterns=(r"^/api/whoami-v2$",),
|
| 308 |
+
default_metadata=_metadata(
|
| 309 |
+
default_fields=["username", "fullname", "is_pro"],
|
| 310 |
+
guaranteed_fields=["username"],
|
| 311 |
+
notes="Returns the current authenticated user when a request token is available.",
|
| 312 |
+
),
|
| 313 |
+
),
|
| 314 |
+
"hf_profile_summary": _config(
|
| 315 |
+
"hf_profile_summary",
|
| 316 |
+
endpoint_patterns=(
|
| 317 |
+
r"^/api/users/[^/]+/overview$",
|
| 318 |
+
r"^/api/organizations/[^/]+/overview$",
|
| 319 |
+
),
|
| 320 |
+
default_metadata=_metadata(
|
| 321 |
+
default_fields=list(PROFILE_CANONICAL_FIELDS),
|
| 322 |
+
guaranteed_fields=["handle", "entity_type"],
|
| 323 |
+
optional_fields=PROFILE_OPTIONAL_FIELDS,
|
| 324 |
+
may_fan_out=True,
|
| 325 |
+
notes=(
|
| 326 |
+
"Profile summary helper. Aggregate counts like followers_count/following_count "
|
| 327 |
+
"are in the base item. include=['likes', 'activity'] adds composed samples and "
|
| 328 |
+
"extra upstream work; no other include values are supported. Overview-owned "
|
| 329 |
+
"repo counts may differ slightly from visible public search/list results."
|
| 330 |
+
),
|
| 331 |
+
),
|
| 332 |
+
),
|
| 333 |
+
"hf_org_members": _config(
|
| 334 |
+
"hf_org_members",
|
| 335 |
+
endpoint_patterns=(r"^/api/organizations/[^/]+/members$",),
|
| 336 |
+
default_metadata=_metadata(
|
| 337 |
+
default_fields=list(ACTOR_CANONICAL_FIELDS),
|
| 338 |
+
guaranteed_fields=["username"],
|
| 339 |
+
optional_fields=ACTOR_OPTIONAL_FIELDS,
|
| 340 |
+
default_limit=1_000,
|
| 341 |
+
max_limit=GRAPH_SCAN_LIMIT_CAP,
|
| 342 |
+
notes="Returns organization member summary rows.",
|
| 343 |
+
),
|
| 344 |
+
pagination={"default_limit": 1_000, "scan_max": GRAPH_SCAN_LIMIT_CAP},
|
| 345 |
+
),
|
| 346 |
+
"hf_models_search": _config(
|
| 347 |
+
"hf_models_search",
|
| 348 |
+
endpoint_patterns=(r"^/api/models$",),
|
| 349 |
+
default_metadata=_metadata(
|
| 350 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 351 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 352 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 353 |
+
default_limit=20,
|
| 354 |
+
max_limit=5_000,
|
| 355 |
+
notes=(
|
| 356 |
+
"Thin model-search wrapper around the Hub list_models path. Prefer this "
|
| 357 |
+
"over hf_repo_search for model-only queries. This is a one-shot selective "
|
| 358 |
+
"search; if meta.limit_boundary_hit is true, more rows may exist and counts "
|
| 359 |
+
"are not exact."
|
| 360 |
+
),
|
| 361 |
+
),
|
| 362 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 363 |
+
),
|
| 364 |
+
"hf_datasets_search": _config(
|
| 365 |
+
"hf_datasets_search",
|
| 366 |
+
endpoint_patterns=(r"^/api/datasets$",),
|
| 367 |
+
default_metadata=_metadata(
|
| 368 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 369 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 370 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 371 |
+
default_limit=20,
|
| 372 |
+
max_limit=5_000,
|
| 373 |
+
notes=(
|
| 374 |
+
"Thin dataset-search wrapper around the Hub list_datasets path. Prefer "
|
| 375 |
+
"this over hf_repo_search for dataset-only queries. This is a one-shot "
|
| 376 |
+
"selective search; if meta.limit_boundary_hit is true, more rows may exist "
|
| 377 |
+
"and counts are not exact."
|
| 378 |
+
),
|
| 379 |
+
),
|
| 380 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 381 |
+
),
|
| 382 |
+
"hf_spaces_search": _config(
|
| 383 |
+
"hf_spaces_search",
|
| 384 |
+
endpoint_patterns=(r"^/api/spaces$",),
|
| 385 |
+
default_metadata=_metadata(
|
| 386 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 387 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 388 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 389 |
+
default_limit=20,
|
| 390 |
+
max_limit=5_000,
|
| 391 |
+
notes=(
|
| 392 |
+
"Thin space-search wrapper around the Hub list_spaces path. Prefer this "
|
| 393 |
+
"over hf_repo_search for space-only queries. This is a one-shot selective "
|
| 394 |
+
"search; if meta.limit_boundary_hit is true, more rows may exist and counts "
|
| 395 |
+
"are not exact."
|
| 396 |
+
),
|
| 397 |
+
),
|
| 398 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 399 |
+
),
|
| 400 |
+
"hf_repo_search": _config(
|
| 401 |
+
"hf_repo_search",
|
| 402 |
+
endpoint_patterns=(r"^/api/models$", r"^/api/datasets$", r"^/api/spaces$"),
|
| 403 |
+
default_metadata=_metadata(
|
| 404 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 405 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 406 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 407 |
+
default_limit=20,
|
| 408 |
+
max_limit=5_000,
|
| 409 |
+
notes=(
|
| 410 |
+
"Small generic repo-search helper. Prefer hf_models_search, "
|
| 411 |
+
"hf_datasets_search, or hf_spaces_search for single-type queries; use "
|
| 412 |
+
"hf_repo_search for intentionally cross-type search. This is a one-shot "
|
| 413 |
+
"selective search; if meta.limit_boundary_hit is true, more rows may exist "
|
| 414 |
+
"and counts are not exact."
|
| 415 |
+
),
|
| 416 |
+
),
|
| 417 |
+
pagination={"default_limit": 20, "max_limit": 5_000},
|
| 418 |
+
),
|
| 419 |
+
"hf_user_graph": _config(
|
| 420 |
+
"hf_user_graph",
|
| 421 |
+
endpoint_patterns=(
|
| 422 |
+
r"^/api/users/[^/]+/(followers|following)$",
|
| 423 |
+
r"^/api/organizations/[^/]+/followers$",
|
| 424 |
+
),
|
| 425 |
+
default_metadata=_metadata(
|
| 426 |
+
default_fields=list(ACTOR_CANONICAL_FIELDS),
|
| 427 |
+
guaranteed_fields=["username"],
|
| 428 |
+
optional_fields=ACTOR_OPTIONAL_FIELDS,
|
| 429 |
+
default_limit=1_000,
|
| 430 |
+
max_limit=GRAPH_SCAN_LIMIT_CAP,
|
| 431 |
+
notes="Returns followers/following summary rows.",
|
| 432 |
+
),
|
| 433 |
+
pagination={
|
| 434 |
+
"default_limit": 1_000,
|
| 435 |
+
"max_limit": GRAPH_SCAN_LIMIT_CAP,
|
| 436 |
+
"scan_max": GRAPH_SCAN_LIMIT_CAP,
|
| 437 |
+
},
|
| 438 |
+
),
|
| 439 |
+
"hf_repo_likers": _config(
|
| 440 |
+
"hf_repo_likers",
|
| 441 |
+
endpoint_patterns=(
|
| 442 |
+
r"^/api/(models|datasets|spaces)/(?:[^/]+|[^/]+/[^/]+)/likers$",
|
| 443 |
+
),
|
| 444 |
+
default_metadata=_metadata(
|
| 445 |
+
default_fields=list(ACTOR_CANONICAL_FIELDS),
|
| 446 |
+
guaranteed_fields=["username"],
|
| 447 |
+
optional_fields=ACTOR_OPTIONAL_FIELDS,
|
| 448 |
+
default_limit=1_000,
|
| 449 |
+
notes="Returns users who liked a repo.",
|
| 450 |
+
),
|
| 451 |
+
pagination={"default_limit": 1_000},
|
| 452 |
+
),
|
| 453 |
+
"hf_user_likes": _config(
|
| 454 |
+
"hf_user_likes",
|
| 455 |
+
endpoint_patterns=(r"^/api/users/[^/]+/likes$",),
|
| 456 |
+
default_metadata=_metadata(
|
| 457 |
+
default_fields=list(USER_LIKES_CANONICAL_FIELDS),
|
| 458 |
+
guaranteed_fields=["liked_at", "repo_id", "repo_type"],
|
| 459 |
+
optional_fields=["repo_author", "repo_likes", "repo_downloads", "repo_url"],
|
| 460 |
+
default_limit=100,
|
| 461 |
+
max_limit=2_000,
|
| 462 |
+
may_fan_out=True,
|
| 463 |
+
notes=(
|
| 464 |
+
"Default recency mode is cheap. Popularity-ranked sorts use canonical keys "
|
| 465 |
+
"liked_at/repo_likes/repo_downloads and rerank only a bounded recent "
|
| 466 |
+
"shortlist. Check meta.ranking_complete / meta.ranking_window when ranking "
|
| 467 |
+
"by popularity; helper-owned coverage matters here."
|
| 468 |
+
),
|
| 469 |
+
),
|
| 470 |
+
pagination={
|
| 471 |
+
"default_limit": 100,
|
| 472 |
+
"enrich_max": LIKES_ENRICHMENT_MAX_REPOS,
|
| 473 |
+
"ranking_default": LIKES_RANKING_WINDOW_DEFAULT,
|
| 474 |
+
"scan_max": LIKES_SCAN_LIMIT_CAP,
|
| 475 |
+
},
|
| 476 |
+
),
|
| 477 |
+
"hf_recent_activity": _config(
|
| 478 |
+
"hf_recent_activity",
|
| 479 |
+
endpoint_patterns=(r"^/api/recent-activity$",),
|
| 480 |
+
default_metadata=_metadata(
|
| 481 |
+
default_fields=list(ACTIVITY_CANONICAL_FIELDS),
|
| 482 |
+
guaranteed_fields=["event_type", "timestamp"],
|
| 483 |
+
optional_fields=["repo_id", "repo_type"],
|
| 484 |
+
default_limit=100,
|
| 485 |
+
max_limit=2_000,
|
| 486 |
+
may_fan_out=True,
|
| 487 |
+
notes=(
|
| 488 |
+
"Activity helper may fetch multiple pages when requested coverage exceeds "
|
| 489 |
+
"one page. count_only may still be a lower bound unless the feed exhausts "
|
| 490 |
+
"before max_pages."
|
| 491 |
+
),
|
| 492 |
+
),
|
| 493 |
+
pagination={
|
| 494 |
+
"default_limit": 100,
|
| 495 |
+
"max_pages": RECENT_ACTIVITY_SCAN_MAX_PAGES,
|
| 496 |
+
"page_limit": RECENT_ACTIVITY_PAGE_SIZE,
|
| 497 |
+
},
|
| 498 |
+
),
|
| 499 |
+
"hf_repo_discussions": _config(
|
| 500 |
+
"hf_repo_discussions",
|
| 501 |
+
endpoint_patterns=(r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",),
|
| 502 |
+
default_metadata=_metadata(
|
| 503 |
+
default_fields=list(DISCUSSION_CANONICAL_FIELDS),
|
| 504 |
+
guaranteed_fields=["num", "title", "author", "status"],
|
| 505 |
+
optional_fields=["repo_id", "repo_type", "created_at", "url"],
|
| 506 |
+
default_limit=20,
|
| 507 |
+
max_limit=200,
|
| 508 |
+
notes="Discussion summary helper.",
|
| 509 |
+
),
|
| 510 |
+
),
|
| 511 |
+
"hf_repo_discussion_details": _config(
|
| 512 |
+
"hf_repo_discussion_details",
|
| 513 |
+
endpoint_patterns=(
|
| 514 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
|
| 515 |
+
),
|
| 516 |
+
default_metadata=_metadata(
|
| 517 |
+
default_fields=list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
|
| 518 |
+
guaranteed_fields=["repo_id", "repo_type", "title", "author", "status"],
|
| 519 |
+
optional_fields=[
|
| 520 |
+
"num",
|
| 521 |
+
"created_at",
|
| 522 |
+
"url",
|
| 523 |
+
"comment_count",
|
| 524 |
+
"latest_comment_author",
|
| 525 |
+
"latest_comment_created_at",
|
| 526 |
+
"latest_comment_text",
|
| 527 |
+
"latest_comment_html",
|
| 528 |
+
],
|
| 529 |
+
notes="Exact discussion detail helper.",
|
| 530 |
+
),
|
| 531 |
+
),
|
| 532 |
+
"hf_repo_details": _config(
|
| 533 |
+
"hf_repo_details",
|
| 534 |
+
endpoint_patterns=(r"^/api/(models|datasets|spaces)/[^/]+/[^/]+$",),
|
| 535 |
+
default_metadata=_metadata(
|
| 536 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 537 |
+
guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
|
| 538 |
+
optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
|
| 539 |
+
may_fan_out=True,
|
| 540 |
+
notes="Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.",
|
| 541 |
+
),
|
| 542 |
+
),
|
| 543 |
+
"hf_trending": _config(
|
| 544 |
+
"hf_trending",
|
| 545 |
+
endpoint_patterns=(r"^/api/trending$",),
|
| 546 |
+
default_metadata=_metadata(
|
| 547 |
+
default_fields=TRENDING_DEFAULT_FIELDS,
|
| 548 |
+
guaranteed_fields=[
|
| 549 |
+
"repo_id",
|
| 550 |
+
"repo_type",
|
| 551 |
+
"author",
|
| 552 |
+
"repo_url",
|
| 553 |
+
"trending_rank",
|
| 554 |
+
],
|
| 555 |
+
optional_fields=TRENDING_OPTIONAL_FIELDS,
|
| 556 |
+
default_limit=20,
|
| 557 |
+
max_limit=TRENDING_ENDPOINT_MAX_LIMIT,
|
| 558 |
+
notes="Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.",
|
| 559 |
+
),
|
| 560 |
+
pagination={"default_limit": 20, "max_limit": TRENDING_ENDPOINT_MAX_LIMIT},
|
| 561 |
+
),
|
| 562 |
+
"hf_daily_papers": _config(
|
| 563 |
+
"hf_daily_papers",
|
| 564 |
+
endpoint_patterns=(r"^/api/daily_papers$",),
|
| 565 |
+
default_metadata=_metadata(
|
| 566 |
+
default_fields=DAILY_PAPER_DEFAULT_FIELDS,
|
| 567 |
+
guaranteed_fields=["paper_id", "title", "published_at", "rank"],
|
| 568 |
+
optional_fields=DAILY_PAPER_OPTIONAL_FIELDS,
|
| 569 |
+
default_limit=20,
|
| 570 |
+
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 571 |
+
notes="Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.",
|
| 572 |
+
),
|
| 573 |
+
pagination={"default_limit": 20, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
|
| 574 |
+
),
|
| 575 |
+
"hf_collections_search": _config(
|
| 576 |
+
"hf_collections_search",
|
| 577 |
+
endpoint_patterns=(r"^/api/collections$",),
|
| 578 |
+
default_metadata=_metadata(
|
| 579 |
+
default_fields=COLLECTION_DEFAULT_FIELDS,
|
| 580 |
+
guaranteed_fields=["collection_id", "title", "owner"],
|
| 581 |
+
optional_fields=COLLECTION_OPTIONAL_FIELDS,
|
| 582 |
+
default_limit=20,
|
| 583 |
+
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 584 |
+
notes="Collection summary helper.",
|
| 585 |
+
),
|
| 586 |
+
pagination={"default_limit": 20, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
|
| 587 |
+
),
|
| 588 |
+
"hf_collection_items": _config(
|
| 589 |
+
"hf_collection_items",
|
| 590 |
+
endpoint_patterns=(
|
| 591 |
+
r"^/api/collections/[^/]+$",
|
| 592 |
+
r"^/api/collections/[^/]+/[^/]+$",
|
| 593 |
+
),
|
| 594 |
+
default_metadata=_metadata(
|
| 595 |
+
default_fields=REPO_SUMMARY_FIELDS,
|
| 596 |
+
guaranteed_fields=["repo_id", "repo_type", "repo_url"],
|
| 597 |
+
optional_fields=[
|
| 598 |
+
field
|
| 599 |
+
for field in REPO_CANONICAL_FIELDS
|
| 600 |
+
if field not in {"repo_id", "repo_type", "repo_url"}
|
| 601 |
+
],
|
| 602 |
+
default_limit=100,
|
| 603 |
+
max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 604 |
+
notes="Returns repos inside one collection as summary rows.",
|
| 605 |
+
),
|
| 606 |
+
pagination={"default_limit": 100, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
|
| 607 |
+
),
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
HELPER_EXTERNALS = tuple(HELPER_CONFIGS)
|
| 611 |
+
|
| 612 |
+
HELPER_DEFAULT_METADATA: dict[str, dict[str, Any]] = {
|
| 613 |
+
name: dict(config.default_metadata) for name, config in HELPER_CONFIGS.items()
|
| 614 |
+
}
|
| 615 |
+
|
| 616 |
+
PAGINATION_POLICY: dict[str, dict[str, Any]] = {
|
| 617 |
+
name: dict(config.pagination)
|
| 618 |
+
for name, config in HELPER_CONFIGS.items()
|
| 619 |
+
if config.pagination
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
HELPER_COVERED_ENDPOINT_PATTERNS: list[tuple[str, str]] = [
|
| 623 |
+
(pattern, config.name)
|
| 624 |
+
for config in HELPER_CONFIGS.values()
|
| 625 |
+
for pattern in config.endpoint_patterns
|
| 626 |
+
]
|
| 627 |
+
|
| 628 |
+
ALLOWLIST_PATTERNS = [
|
| 629 |
+
r"^/api/whoami-v2$",
|
| 630 |
+
r"^/api/trending$",
|
| 631 |
+
r"^/api/daily_papers$",
|
| 632 |
+
r"^/api/models$",
|
| 633 |
+
r"^/api/datasets$",
|
| 634 |
+
r"^/api/spaces$",
|
| 635 |
+
r"^/api/models-tags-by-type$",
|
| 636 |
+
r"^/api/datasets-tags-by-type$",
|
| 637 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+$",
|
| 638 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",
|
| 639 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
|
| 640 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+/status$",
|
| 641 |
+
r"^/api/users/[^/]+/overview$",
|
| 642 |
+
r"^/api/users/[^/]+/socials$",
|
| 643 |
+
r"^/api/users/[^/]+/followers$",
|
| 644 |
+
r"^/api/users/[^/]+/following$",
|
| 645 |
+
r"^/api/users/[^/]+/likes$",
|
| 646 |
+
r"^/api/(models|datasets|spaces)/(?:[^/]+|[^/]+/[^/]+)/likers$",
|
| 647 |
+
r"^/api/organizations/[^/]+/overview$",
|
| 648 |
+
r"^/api/organizations/[^/]+/members$",
|
| 649 |
+
r"^/api/organizations/[^/]+/followers$",
|
| 650 |
+
r"^/api/collections$",
|
| 651 |
+
r"^/api/collections/[^/]+$",
|
| 652 |
+
r"^/api/collections/[^/]+/[^/]+$",
|
| 653 |
+
r"^/api/recent-activity$",
|
| 654 |
+
]
|
| 655 |
+
|
| 656 |
+
STRICT_ALLOWLIST_PATTERNS = [
|
| 657 |
+
r"^/api/users/[^/]+/overview$",
|
| 658 |
+
r"^/api/users/[^/]+/socials$",
|
| 659 |
+
r"^/api/whoami-v2$",
|
| 660 |
+
r"^/api/trending$",
|
| 661 |
+
r"^/api/daily_papers$",
|
| 662 |
+
r"^/api/(models|datasets|spaces)/(?:[^/]+|[^/]+/[^/]+)/likers$",
|
| 663 |
+
r"^/api/collections$",
|
| 664 |
+
r"^/api/collections/[^/]+$",
|
| 665 |
+
r"^/api/collections/[^/]+/[^/]+$",
|
| 666 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",
|
| 667 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
|
| 668 |
+
r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+/status$",
|
| 669 |
+
]
|
| 670 |
+
|
| 671 |
+
REPO_API_ADAPTERS: dict[str, RepoApiAdapter] = {
|
| 672 |
+
"model": RepoApiAdapter(
|
| 673 |
+
list_method_name="list_models", detail_method_name="model_info"
|
| 674 |
+
),
|
| 675 |
+
"dataset": RepoApiAdapter(
|
| 676 |
+
list_method_name="list_datasets", detail_method_name="dataset_info"
|
| 677 |
+
),
|
| 678 |
+
"space": RepoApiAdapter(
|
| 679 |
+
list_method_name="list_spaces", detail_method_name="space_info"
|
| 680 |
+
),
|
| 681 |
+
}
|
.prod/monty_api/runtime_context.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dataclasses import dataclass, field
|
| 5 |
+
from typing import TYPE_CHECKING, Any, Callable, NamedTuple, cast
|
| 6 |
+
|
| 7 |
+
from .constants import MAX_CALLS_LIMIT
|
| 8 |
+
from .helpers.activity import register_activity_helpers
|
| 9 |
+
from .helpers.collections import register_collection_helpers
|
| 10 |
+
from .helpers.introspection import register_introspection_helpers
|
| 11 |
+
from .helpers.profiles import register_profile_helpers
|
| 12 |
+
from .helpers.repos import register_repo_helpers
|
| 13 |
+
from .http_runtime import (
|
| 14 |
+
_as_int,
|
| 15 |
+
_author_from_any,
|
| 16 |
+
_canonical_repo_type,
|
| 17 |
+
_clamp_int,
|
| 18 |
+
_coerce_str_list,
|
| 19 |
+
_dt_to_str,
|
| 20 |
+
_extract_author_names,
|
| 21 |
+
_extract_num_params,
|
| 22 |
+
_extract_profile_name,
|
| 23 |
+
_load_token,
|
| 24 |
+
_normalize_collection_repo_item,
|
| 25 |
+
_normalize_daily_paper_row,
|
| 26 |
+
_normalize_repo_detail_row,
|
| 27 |
+
_normalize_repo_search_row,
|
| 28 |
+
_normalize_repo_sort_key,
|
| 29 |
+
_normalize_trending_row,
|
| 30 |
+
_optional_str_list,
|
| 31 |
+
_repo_detail_call,
|
| 32 |
+
_repo_list_call,
|
| 33 |
+
_repo_web_url,
|
| 34 |
+
_sort_repo_rows,
|
| 35 |
+
call_api_host,
|
| 36 |
+
)
|
| 37 |
+
from .registry import PAGINATION_POLICY
|
| 38 |
+
from .runtime_envelopes import (
|
| 39 |
+
_build_exhaustive_meta,
|
| 40 |
+
_build_exhaustive_result_meta,
|
| 41 |
+
_derive_can_request_more,
|
| 42 |
+
_derive_limit_metadata,
|
| 43 |
+
_derive_more_available,
|
| 44 |
+
_derive_next_request_hint,
|
| 45 |
+
_derive_truncated_by,
|
| 46 |
+
_helper_error,
|
| 47 |
+
_helper_meta,
|
| 48 |
+
_helper_success,
|
| 49 |
+
_overview_count_only_success,
|
| 50 |
+
_resolve_exhaustive_limits,
|
| 51 |
+
)
|
| 52 |
+
from .runtime_filtering import (
|
| 53 |
+
_apply_where,
|
| 54 |
+
_helper_item,
|
| 55 |
+
_item_matches_where,
|
| 56 |
+
_normalize_where,
|
| 57 |
+
_overview_count,
|
| 58 |
+
_project_activity_items,
|
| 59 |
+
_project_actor_items,
|
| 60 |
+
_project_collection_items,
|
| 61 |
+
_project_discussion_detail_items,
|
| 62 |
+
_project_discussion_items,
|
| 63 |
+
_project_daily_paper_items,
|
| 64 |
+
_project_items,
|
| 65 |
+
_project_repo_items,
|
| 66 |
+
_project_user_items,
|
| 67 |
+
_project_user_like_items,
|
| 68 |
+
)
|
| 69 |
+
from .validation import _resolve_helper_functions
|
| 70 |
+
|
| 71 |
+
if TYPE_CHECKING:
|
| 72 |
+
from huggingface_hub import HfApi
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class RuntimeHelperEnvironment(NamedTuple):
|
| 76 |
+
context: "RuntimeContext"
|
| 77 |
+
call_count: dict[str, int]
|
| 78 |
+
trace: list[dict[str, Any]]
|
| 79 |
+
limit_summaries: list[dict[str, Any]]
|
| 80 |
+
latest_helper_error_box: dict[str, dict[str, Any] | None]
|
| 81 |
+
internal_helper_used: dict[str, bool]
|
| 82 |
+
helper_functions: dict[str, Callable[..., Any]]
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
@dataclass(slots=True)
|
| 86 |
+
class RuntimeContext:
|
| 87 |
+
max_calls: int
|
| 88 |
+
strict_mode: bool
|
| 89 |
+
timeout_sec: int
|
| 90 |
+
call_count: dict[str, int] = field(default_factory=lambda: {"n": 0})
|
| 91 |
+
trace: list[dict[str, Any]] = field(default_factory=list)
|
| 92 |
+
limit_summaries: list[dict[str, Any]] = field(default_factory=list)
|
| 93 |
+
latest_helper_error_box: dict[str, dict[str, Any] | None] = field(
|
| 94 |
+
default_factory=lambda: {"value": None}
|
| 95 |
+
)
|
| 96 |
+
internal_helper_used: dict[str, bool] = field(
|
| 97 |
+
default_factory=lambda: {"used": False}
|
| 98 |
+
)
|
| 99 |
+
helper_registry: dict[str, Callable[..., Any]] = field(default_factory=dict)
|
| 100 |
+
_hf_api_client: "HfApi | None" = field(default=None, init=False, repr=False)
|
| 101 |
+
|
| 102 |
+
def _budget_remaining(self) -> int:
|
| 103 |
+
return max(0, self.max_calls - self.call_count["n"])
|
| 104 |
+
|
| 105 |
+
def _policy_int(self, helper_name: str, key: str, default: int) -> int:
|
| 106 |
+
cfg = PAGINATION_POLICY.get(helper_name) or {}
|
| 107 |
+
try:
|
| 108 |
+
return int(cfg.get(key, default))
|
| 109 |
+
except Exception:
|
| 110 |
+
return int(default)
|
| 111 |
+
|
| 112 |
+
def _consume_call(self, endpoint: str, method: str = "GET") -> int:
|
| 113 |
+
if self.call_count["n"] >= self.max_calls:
|
| 114 |
+
raise RuntimeError(f"Max API calls exceeded ({self.max_calls})")
|
| 115 |
+
self.call_count["n"] += 1
|
| 116 |
+
return self.call_count["n"]
|
| 117 |
+
|
| 118 |
+
def _trace_ok(
|
| 119 |
+
self, idx: int, endpoint: str, method: str = "GET", status: int = 200
|
| 120 |
+
) -> None:
|
| 121 |
+
self.trace.append(
|
| 122 |
+
{
|
| 123 |
+
"call_index": idx,
|
| 124 |
+
"depth": idx,
|
| 125 |
+
"method": method,
|
| 126 |
+
"endpoint": endpoint,
|
| 127 |
+
"ok": True,
|
| 128 |
+
"status": status,
|
| 129 |
+
}
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
def _trace_err(
|
| 133 |
+
self, idx: int, endpoint: str, err: Any, method: str = "GET", status: int = 0
|
| 134 |
+
) -> None:
|
| 135 |
+
self.trace.append(
|
| 136 |
+
{
|
| 137 |
+
"call_index": idx,
|
| 138 |
+
"depth": idx,
|
| 139 |
+
"method": method,
|
| 140 |
+
"endpoint": endpoint,
|
| 141 |
+
"ok": False,
|
| 142 |
+
"status": status,
|
| 143 |
+
"error": str(err),
|
| 144 |
+
}
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
def _host_raw_call(
|
| 148 |
+
self,
|
| 149 |
+
endpoint: str,
|
| 150 |
+
*,
|
| 151 |
+
params: dict[str, Any] | None = None,
|
| 152 |
+
method: str = "GET",
|
| 153 |
+
json_body: dict[str, Any] | None = None,
|
| 154 |
+
) -> dict[str, Any]:
|
| 155 |
+
idx = self._consume_call(endpoint, method)
|
| 156 |
+
try:
|
| 157 |
+
resp = call_api_host(
|
| 158 |
+
endpoint,
|
| 159 |
+
method=method,
|
| 160 |
+
params=params,
|
| 161 |
+
json_body=json_body,
|
| 162 |
+
timeout_sec=self.timeout_sec,
|
| 163 |
+
strict_mode=self.strict_mode,
|
| 164 |
+
)
|
| 165 |
+
if resp.get("ok"):
|
| 166 |
+
self._trace_ok(
|
| 167 |
+
idx, endpoint, method=method, status=int(resp.get("status") or 200)
|
| 168 |
+
)
|
| 169 |
+
else:
|
| 170 |
+
self._trace_err(
|
| 171 |
+
idx,
|
| 172 |
+
endpoint,
|
| 173 |
+
resp.get("error"),
|
| 174 |
+
method=method,
|
| 175 |
+
status=int(resp.get("status") or 0),
|
| 176 |
+
)
|
| 177 |
+
return resp
|
| 178 |
+
except Exception as exc:
|
| 179 |
+
self._trace_err(idx, endpoint, exc, method=method, status=0)
|
| 180 |
+
raise
|
| 181 |
+
|
| 182 |
+
def _get_hf_api_client(self) -> "HfApi":
|
| 183 |
+
if self._hf_api_client is None:
|
| 184 |
+
from huggingface_hub import HfApi
|
| 185 |
+
|
| 186 |
+
endpoint = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
|
| 187 |
+
self._hf_api_client = HfApi(endpoint=endpoint, token=_load_token())
|
| 188 |
+
return self._hf_api_client
|
| 189 |
+
|
| 190 |
+
def _host_hf_call(self, endpoint: str, fn: Callable[[], Any]) -> Any:
|
| 191 |
+
idx = self._consume_call(endpoint, "GET")
|
| 192 |
+
try:
|
| 193 |
+
out = fn()
|
| 194 |
+
self._trace_ok(idx, endpoint, method="GET", status=200)
|
| 195 |
+
return out
|
| 196 |
+
except Exception as exc:
|
| 197 |
+
self._trace_err(idx, endpoint, exc, method="GET", status=0)
|
| 198 |
+
raise
|
| 199 |
+
|
| 200 |
+
async def call_helper(self, helper_name: str, /, *args: Any, **kwargs: Any) -> Any:
|
| 201 |
+
fn = self.helper_registry.get(helper_name)
|
| 202 |
+
if not callable(fn):
|
| 203 |
+
raise RuntimeError(f"Helper '{helper_name}' is not registered")
|
| 204 |
+
return await cast(Callable[..., Any], fn)(*args, **kwargs)
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
for name, value in {
|
| 208 |
+
"_helper_meta": _helper_meta,
|
| 209 |
+
"_derive_limit_metadata": _derive_limit_metadata,
|
| 210 |
+
"_derive_more_available": _derive_more_available,
|
| 211 |
+
"_derive_truncated_by": _derive_truncated_by,
|
| 212 |
+
"_derive_can_request_more": _derive_can_request_more,
|
| 213 |
+
"_derive_next_request_hint": _derive_next_request_hint,
|
| 214 |
+
"_resolve_exhaustive_limits": _resolve_exhaustive_limits,
|
| 215 |
+
"_build_exhaustive_meta": _build_exhaustive_meta,
|
| 216 |
+
"_overview_count_only_success": _overview_count_only_success,
|
| 217 |
+
"_build_exhaustive_result_meta": _build_exhaustive_result_meta,
|
| 218 |
+
"_helper_success": _helper_success,
|
| 219 |
+
"_helper_error": _helper_error,
|
| 220 |
+
"_project_items": _project_items,
|
| 221 |
+
"_project_repo_items": _project_repo_items,
|
| 222 |
+
"_project_collection_items": _project_collection_items,
|
| 223 |
+
"_project_discussion_items": _project_discussion_items,
|
| 224 |
+
"_project_discussion_detail_items": _project_discussion_detail_items,
|
| 225 |
+
"_project_daily_paper_items": _project_daily_paper_items,
|
| 226 |
+
"_project_user_items": _project_user_items,
|
| 227 |
+
"_project_actor_items": _project_actor_items,
|
| 228 |
+
"_project_user_like_items": _project_user_like_items,
|
| 229 |
+
"_project_activity_items": _project_activity_items,
|
| 230 |
+
"_normalize_where": _normalize_where,
|
| 231 |
+
"_item_matches_where": _item_matches_where,
|
| 232 |
+
"_apply_where": _apply_where,
|
| 233 |
+
"_helper_item": _helper_item,
|
| 234 |
+
"_overview_count": _overview_count,
|
| 235 |
+
"_as_int": staticmethod(_as_int),
|
| 236 |
+
"_author_from_any": staticmethod(_author_from_any),
|
| 237 |
+
"_canonical_repo_type": staticmethod(_canonical_repo_type),
|
| 238 |
+
"_clamp_int": staticmethod(_clamp_int),
|
| 239 |
+
"_coerce_str_list": staticmethod(_coerce_str_list),
|
| 240 |
+
"_dt_to_str": staticmethod(_dt_to_str),
|
| 241 |
+
"_extract_author_names": staticmethod(_extract_author_names),
|
| 242 |
+
"_extract_num_params": staticmethod(_extract_num_params),
|
| 243 |
+
"_extract_profile_name": staticmethod(_extract_profile_name),
|
| 244 |
+
"_load_token": staticmethod(_load_token),
|
| 245 |
+
"_normalize_collection_repo_item": staticmethod(_normalize_collection_repo_item),
|
| 246 |
+
"_normalize_daily_paper_row": staticmethod(_normalize_daily_paper_row),
|
| 247 |
+
"_normalize_repo_detail_row": staticmethod(_normalize_repo_detail_row),
|
| 248 |
+
"_normalize_repo_search_row": staticmethod(_normalize_repo_search_row),
|
| 249 |
+
"_normalize_repo_sort_key": staticmethod(_normalize_repo_sort_key),
|
| 250 |
+
"_normalize_trending_row": staticmethod(_normalize_trending_row),
|
| 251 |
+
"_optional_str_list": staticmethod(_optional_str_list),
|
| 252 |
+
"_repo_detail_call": staticmethod(_repo_detail_call),
|
| 253 |
+
"_repo_list_call": staticmethod(_repo_list_call),
|
| 254 |
+
"_repo_web_url": staticmethod(_repo_web_url),
|
| 255 |
+
"_sort_repo_rows": staticmethod(_sort_repo_rows),
|
| 256 |
+
}.items():
|
| 257 |
+
setattr(RuntimeContext, name, value)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def build_runtime_helper_environment(
|
| 261 |
+
*,
|
| 262 |
+
max_calls: int,
|
| 263 |
+
strict_mode: bool,
|
| 264 |
+
timeout_sec: int,
|
| 265 |
+
) -> RuntimeHelperEnvironment:
|
| 266 |
+
ctx = RuntimeContext(
|
| 267 |
+
max_calls=max(1, min(int(max_calls), MAX_CALLS_LIMIT)),
|
| 268 |
+
strict_mode=strict_mode,
|
| 269 |
+
timeout_sec=timeout_sec,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
for registration in (
|
| 273 |
+
register_profile_helpers,
|
| 274 |
+
register_repo_helpers,
|
| 275 |
+
register_activity_helpers,
|
| 276 |
+
register_collection_helpers,
|
| 277 |
+
register_introspection_helpers,
|
| 278 |
+
):
|
| 279 |
+
ctx.helper_registry.update(registration(ctx))
|
| 280 |
+
|
| 281 |
+
helper_functions = _resolve_helper_functions(ctx.helper_registry)
|
| 282 |
+
return RuntimeHelperEnvironment(
|
| 283 |
+
context=ctx,
|
| 284 |
+
call_count=ctx.call_count,
|
| 285 |
+
trace=ctx.trace,
|
| 286 |
+
limit_summaries=ctx.limit_summaries,
|
| 287 |
+
latest_helper_error_box=ctx.latest_helper_error_box,
|
| 288 |
+
internal_helper_used=ctx.internal_helper_used,
|
| 289 |
+
helper_functions=helper_functions,
|
| 290 |
+
)
|
.prod/monty_api/runtime_envelopes.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
from .http_runtime import _as_int, _clamp_int
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _helper_meta(
|
| 9 |
+
self: Any, start_calls: int, *, source: str, **extra: Any
|
| 10 |
+
) -> dict[str, Any]:
|
| 11 |
+
out = {
|
| 12 |
+
"source": source,
|
| 13 |
+
"normalized": True,
|
| 14 |
+
"budget_used": max(0, self.call_count["n"] - start_calls),
|
| 15 |
+
"budget_remaining": self._budget_remaining(),
|
| 16 |
+
}
|
| 17 |
+
out.update(extra)
|
| 18 |
+
return out
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _derive_limit_metadata(
|
| 22 |
+
self: Any,
|
| 23 |
+
*,
|
| 24 |
+
requested_limit: int | None,
|
| 25 |
+
applied_limit: int,
|
| 26 |
+
default_limit_used: bool,
|
| 27 |
+
requested_scan_limit: int | None = None,
|
| 28 |
+
applied_scan_limit: int | None = None,
|
| 29 |
+
requested_max_pages: int | None = None,
|
| 30 |
+
applied_max_pages: int | None = None,
|
| 31 |
+
) -> dict[str, Any]:
|
| 32 |
+
meta: dict[str, Any] = {
|
| 33 |
+
"requested_limit": requested_limit,
|
| 34 |
+
"applied_limit": applied_limit,
|
| 35 |
+
"default_limit_used": default_limit_used,
|
| 36 |
+
}
|
| 37 |
+
if requested_scan_limit is not None or applied_scan_limit is not None:
|
| 38 |
+
meta["requested_scan_limit"] = requested_scan_limit
|
| 39 |
+
meta["scan_limit"] = applied_scan_limit
|
| 40 |
+
meta["scan_limit_applied"] = requested_scan_limit != applied_scan_limit
|
| 41 |
+
if requested_max_pages is not None or applied_max_pages is not None:
|
| 42 |
+
meta["requested_max_pages"] = requested_max_pages
|
| 43 |
+
meta["applied_max_pages"] = applied_max_pages
|
| 44 |
+
meta["page_limit_applied"] = requested_max_pages != applied_max_pages
|
| 45 |
+
if requested_limit is not None:
|
| 46 |
+
meta["hard_cap_applied"] = applied_limit < requested_limit
|
| 47 |
+
return meta
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _derive_more_available(
|
| 51 |
+
self: Any,
|
| 52 |
+
*,
|
| 53 |
+
sample_complete: bool,
|
| 54 |
+
exact_count: bool,
|
| 55 |
+
returned: int,
|
| 56 |
+
total: int | None,
|
| 57 |
+
) -> bool | str:
|
| 58 |
+
if sample_complete:
|
| 59 |
+
return False
|
| 60 |
+
if exact_count and total is not None and returned < total:
|
| 61 |
+
return True
|
| 62 |
+
return "unknown"
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _derive_truncated_by(
|
| 66 |
+
self: Any,
|
| 67 |
+
*,
|
| 68 |
+
hard_cap: bool = False,
|
| 69 |
+
scan_limit_hit: bool = False,
|
| 70 |
+
page_limit_hit: bool = False,
|
| 71 |
+
limit_hit: bool = False,
|
| 72 |
+
) -> str:
|
| 73 |
+
causes = [hard_cap, scan_limit_hit, page_limit_hit, limit_hit]
|
| 74 |
+
if sum(1 for cause in causes if cause) > 1:
|
| 75 |
+
return "multiple"
|
| 76 |
+
if hard_cap:
|
| 77 |
+
return "hard_cap"
|
| 78 |
+
if scan_limit_hit:
|
| 79 |
+
return "scan_limit"
|
| 80 |
+
if page_limit_hit:
|
| 81 |
+
return "page_limit"
|
| 82 |
+
if limit_hit:
|
| 83 |
+
return "limit"
|
| 84 |
+
return "none"
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _derive_can_request_more(
|
| 88 |
+
self: Any, *, sample_complete: bool, truncated_by: str
|
| 89 |
+
) -> bool:
|
| 90 |
+
if sample_complete:
|
| 91 |
+
return False
|
| 92 |
+
return truncated_by in {"limit", "scan_limit", "page_limit", "multiple"}
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _derive_next_request_hint(
|
| 96 |
+
self: Any,
|
| 97 |
+
*,
|
| 98 |
+
truncated_by: str,
|
| 99 |
+
more_available: bool | str,
|
| 100 |
+
applied_limit: int,
|
| 101 |
+
applied_scan_limit: int | None = None,
|
| 102 |
+
applied_max_pages: int | None = None,
|
| 103 |
+
) -> str:
|
| 104 |
+
if truncated_by == "limit":
|
| 105 |
+
return f"Ask for limit>{applied_limit} to see more rows"
|
| 106 |
+
if truncated_by == "scan_limit" and applied_scan_limit is not None:
|
| 107 |
+
return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
|
| 108 |
+
if truncated_by == "page_limit" and applied_max_pages is not None:
|
| 109 |
+
return f"Increase max_pages above {applied_max_pages} to continue paging"
|
| 110 |
+
if truncated_by == "hard_cap":
|
| 111 |
+
return "No more rows can be returned in a single call because a hard cap was applied"
|
| 112 |
+
if truncated_by == "multiple":
|
| 113 |
+
return "Increase the relevant return/page/scan bounds to improve coverage"
|
| 114 |
+
if more_available is False:
|
| 115 |
+
return "No more results available"
|
| 116 |
+
if more_available == "unknown":
|
| 117 |
+
return "More results may exist; narrow filters or raise scan/page bounds for better coverage"
|
| 118 |
+
return "Ask for a larger limit to see more rows"
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _resolve_exhaustive_limits(
|
| 122 |
+
self: Any,
|
| 123 |
+
*,
|
| 124 |
+
limit: int | None,
|
| 125 |
+
count_only: bool,
|
| 126 |
+
default_limit: int,
|
| 127 |
+
max_limit: int,
|
| 128 |
+
scan_limit: int | None = None,
|
| 129 |
+
scan_cap: int | None = None,
|
| 130 |
+
) -> dict[str, Any]:
|
| 131 |
+
requested_limit = None if count_only else limit
|
| 132 |
+
effective_requested_limit = 0 if count_only else requested_limit
|
| 133 |
+
out: dict[str, Any] = {
|
| 134 |
+
"requested_limit": requested_limit,
|
| 135 |
+
"applied_limit": _clamp_int(
|
| 136 |
+
effective_requested_limit,
|
| 137 |
+
default=default_limit,
|
| 138 |
+
minimum=0,
|
| 139 |
+
maximum=max_limit,
|
| 140 |
+
),
|
| 141 |
+
"default_limit_used": requested_limit is None and not count_only,
|
| 142 |
+
}
|
| 143 |
+
out["hard_cap_applied"] = (
|
| 144 |
+
requested_limit is not None and out["applied_limit"] < requested_limit
|
| 145 |
+
)
|
| 146 |
+
if scan_cap is not None:
|
| 147 |
+
out["requested_scan_limit"] = scan_limit
|
| 148 |
+
out["applied_scan_limit"] = _clamp_int(
|
| 149 |
+
scan_limit,
|
| 150 |
+
default=scan_cap,
|
| 151 |
+
minimum=1,
|
| 152 |
+
maximum=scan_cap,
|
| 153 |
+
)
|
| 154 |
+
return out
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _build_exhaustive_meta(
|
| 158 |
+
self: Any,
|
| 159 |
+
*,
|
| 160 |
+
base_meta: dict[str, Any],
|
| 161 |
+
limit_plan: dict[str, Any],
|
| 162 |
+
sample_complete: bool,
|
| 163 |
+
exact_count: bool,
|
| 164 |
+
truncated_by: str,
|
| 165 |
+
more_available: bool | str,
|
| 166 |
+
requested_max_pages: int | None = None,
|
| 167 |
+
applied_max_pages: int | None = None,
|
| 168 |
+
) -> dict[str, Any]:
|
| 169 |
+
meta = dict(base_meta)
|
| 170 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 171 |
+
applied_scan_limit = limit_plan.get("applied_scan_limit")
|
| 172 |
+
meta.update(
|
| 173 |
+
{
|
| 174 |
+
"complete": sample_complete,
|
| 175 |
+
"exact_count": exact_count,
|
| 176 |
+
"sample_complete": sample_complete,
|
| 177 |
+
"more_available": more_available,
|
| 178 |
+
"can_request_more": _derive_can_request_more(
|
| 179 |
+
self,
|
| 180 |
+
sample_complete=sample_complete,
|
| 181 |
+
truncated_by=truncated_by,
|
| 182 |
+
),
|
| 183 |
+
"truncated_by": truncated_by,
|
| 184 |
+
"next_request_hint": _derive_next_request_hint(
|
| 185 |
+
self,
|
| 186 |
+
truncated_by=truncated_by,
|
| 187 |
+
more_available=more_available,
|
| 188 |
+
applied_limit=applied_limit,
|
| 189 |
+
applied_scan_limit=applied_scan_limit
|
| 190 |
+
if isinstance(applied_scan_limit, int)
|
| 191 |
+
else None,
|
| 192 |
+
applied_max_pages=applied_max_pages,
|
| 193 |
+
),
|
| 194 |
+
}
|
| 195 |
+
)
|
| 196 |
+
meta.update(
|
| 197 |
+
_derive_limit_metadata(
|
| 198 |
+
self,
|
| 199 |
+
requested_limit=limit_plan["requested_limit"],
|
| 200 |
+
applied_limit=applied_limit,
|
| 201 |
+
default_limit_used=bool(limit_plan["default_limit_used"]),
|
| 202 |
+
requested_scan_limit=limit_plan.get("requested_scan_limit"),
|
| 203 |
+
applied_scan_limit=applied_scan_limit
|
| 204 |
+
if isinstance(applied_scan_limit, int)
|
| 205 |
+
else None,
|
| 206 |
+
requested_max_pages=requested_max_pages,
|
| 207 |
+
applied_max_pages=applied_max_pages,
|
| 208 |
+
)
|
| 209 |
+
)
|
| 210 |
+
return meta
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def _overview_count_only_success(
|
| 214 |
+
self: Any,
|
| 215 |
+
*,
|
| 216 |
+
start_calls: int,
|
| 217 |
+
source: str,
|
| 218 |
+
total: int,
|
| 219 |
+
limit_plan: dict[str, Any],
|
| 220 |
+
base_meta: dict[str, Any],
|
| 221 |
+
) -> dict[str, Any]:
|
| 222 |
+
meta = _build_exhaustive_meta(
|
| 223 |
+
self,
|
| 224 |
+
base_meta={
|
| 225 |
+
**base_meta,
|
| 226 |
+
"matched": total,
|
| 227 |
+
"returned": 0,
|
| 228 |
+
"total": total,
|
| 229 |
+
"total_available": total,
|
| 230 |
+
"total_matched": total,
|
| 231 |
+
"truncated": False,
|
| 232 |
+
},
|
| 233 |
+
limit_plan=limit_plan,
|
| 234 |
+
sample_complete=True,
|
| 235 |
+
exact_count=True,
|
| 236 |
+
truncated_by="none",
|
| 237 |
+
more_available=False,
|
| 238 |
+
)
|
| 239 |
+
return _helper_success(
|
| 240 |
+
self,
|
| 241 |
+
start_calls=start_calls,
|
| 242 |
+
source=source,
|
| 243 |
+
items=[],
|
| 244 |
+
meta=meta,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def _build_exhaustive_result_meta(
|
| 249 |
+
self: Any,
|
| 250 |
+
*,
|
| 251 |
+
base_meta: dict[str, Any],
|
| 252 |
+
limit_plan: dict[str, Any],
|
| 253 |
+
matched_count: int,
|
| 254 |
+
returned_count: int,
|
| 255 |
+
exact_count: bool,
|
| 256 |
+
count_only: bool = False,
|
| 257 |
+
sample_complete: bool | None = None,
|
| 258 |
+
more_available: bool | str | None = None,
|
| 259 |
+
scan_limit_hit: bool = False,
|
| 260 |
+
page_limit_hit: bool = False,
|
| 261 |
+
truncated_extra: bool = False,
|
| 262 |
+
requested_max_pages: int | None = None,
|
| 263 |
+
applied_max_pages: int | None = None,
|
| 264 |
+
) -> dict[str, Any]:
|
| 265 |
+
applied_limit = int(limit_plan["applied_limit"])
|
| 266 |
+
if count_only:
|
| 267 |
+
effective_sample_complete = exact_count
|
| 268 |
+
else:
|
| 269 |
+
effective_sample_complete = (
|
| 270 |
+
sample_complete
|
| 271 |
+
if isinstance(sample_complete, bool)
|
| 272 |
+
else exact_count and matched_count <= applied_limit
|
| 273 |
+
)
|
| 274 |
+
limit_hit = (
|
| 275 |
+
False
|
| 276 |
+
if count_only
|
| 277 |
+
else (applied_limit > 0 and matched_count > applied_limit)
|
| 278 |
+
)
|
| 279 |
+
truncated_by = _derive_truncated_by(
|
| 280 |
+
self,
|
| 281 |
+
hard_cap=bool(limit_plan.get("hard_cap_applied")),
|
| 282 |
+
scan_limit_hit=scan_limit_hit,
|
| 283 |
+
page_limit_hit=page_limit_hit,
|
| 284 |
+
limit_hit=limit_hit,
|
| 285 |
+
)
|
| 286 |
+
truncated = truncated_by != "none" or truncated_extra
|
| 287 |
+
total_value = _as_int(base_meta.get("total"))
|
| 288 |
+
effective_more_available = more_available
|
| 289 |
+
if count_only and exact_count:
|
| 290 |
+
effective_more_available = False
|
| 291 |
+
if effective_more_available is None:
|
| 292 |
+
effective_more_available = _derive_more_available(
|
| 293 |
+
self,
|
| 294 |
+
sample_complete=effective_sample_complete,
|
| 295 |
+
exact_count=exact_count,
|
| 296 |
+
returned=returned_count,
|
| 297 |
+
total=total_value,
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
return _build_exhaustive_meta(
|
| 301 |
+
self,
|
| 302 |
+
base_meta={
|
| 303 |
+
**base_meta,
|
| 304 |
+
"matched": matched_count,
|
| 305 |
+
"returned": returned_count,
|
| 306 |
+
"truncated": truncated,
|
| 307 |
+
},
|
| 308 |
+
limit_plan=limit_plan,
|
| 309 |
+
sample_complete=effective_sample_complete,
|
| 310 |
+
exact_count=exact_count,
|
| 311 |
+
truncated_by=truncated_by,
|
| 312 |
+
more_available=effective_more_available,
|
| 313 |
+
requested_max_pages=requested_max_pages,
|
| 314 |
+
applied_max_pages=applied_max_pages,
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def _helper_success(
|
| 319 |
+
self: Any,
|
| 320 |
+
*,
|
| 321 |
+
start_calls: int,
|
| 322 |
+
source: str,
|
| 323 |
+
items: list[dict[str, Any]],
|
| 324 |
+
cursor: str | None = None,
|
| 325 |
+
meta: dict[str, Any] | None = None,
|
| 326 |
+
**extra_meta: Any,
|
| 327 |
+
) -> dict[str, Any]:
|
| 328 |
+
merged_meta = dict(meta or {})
|
| 329 |
+
merged_meta.update(extra_meta)
|
| 330 |
+
if cursor is not None:
|
| 331 |
+
merged_meta["cursor"] = cursor
|
| 332 |
+
return {
|
| 333 |
+
"ok": True,
|
| 334 |
+
"item": items[0] if len(items) == 1 else None,
|
| 335 |
+
"items": items,
|
| 336 |
+
"meta": _helper_meta(self, start_calls, source=source, **merged_meta),
|
| 337 |
+
"error": None,
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def _helper_error(
|
| 342 |
+
self: Any,
|
| 343 |
+
*,
|
| 344 |
+
start_calls: int,
|
| 345 |
+
source: str,
|
| 346 |
+
error: Any,
|
| 347 |
+
**meta: Any,
|
| 348 |
+
) -> dict[str, Any]:
|
| 349 |
+
envelope = {
|
| 350 |
+
"ok": False,
|
| 351 |
+
"item": None,
|
| 352 |
+
"items": [],
|
| 353 |
+
"meta": _helper_meta(self, start_calls, source=source, **meta),
|
| 354 |
+
"error": str(error),
|
| 355 |
+
}
|
| 356 |
+
self.latest_helper_error_box["value"] = envelope
|
| 357 |
+
return envelope
|
.prod/monty_api/runtime_filtering.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
from .constants import (
|
| 6 |
+
ACTIVITY_CANONICAL_FIELDS,
|
| 7 |
+
ACTOR_CANONICAL_FIELDS,
|
| 8 |
+
COLLECTION_CANONICAL_FIELDS,
|
| 9 |
+
DAILY_PAPER_CANONICAL_FIELDS,
|
| 10 |
+
DISCUSSION_CANONICAL_FIELDS,
|
| 11 |
+
DISCUSSION_DETAIL_CANONICAL_FIELDS,
|
| 12 |
+
REPO_CANONICAL_FIELDS,
|
| 13 |
+
USER_CANONICAL_FIELDS,
|
| 14 |
+
USER_LIKES_CANONICAL_FIELDS,
|
| 15 |
+
)
|
| 16 |
+
from .http_runtime import _as_int
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _allowed_field_set(allowed_fields: tuple[str, ...] | list[str] | set[str]) -> set[str]:
|
| 20 |
+
return {str(field).strip() for field in allowed_fields if str(field).strip()}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _project_items(
|
| 24 |
+
self: Any,
|
| 25 |
+
items: list[dict[str, Any]],
|
| 26 |
+
fields: list[str] | None,
|
| 27 |
+
*,
|
| 28 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 29 |
+
) -> list[dict[str, Any]]:
|
| 30 |
+
if not isinstance(fields, list) or not fields:
|
| 31 |
+
return items
|
| 32 |
+
wanted = [str(field).strip() for field in fields if str(field).strip()]
|
| 33 |
+
if not wanted:
|
| 34 |
+
return items
|
| 35 |
+
if allowed_fields is not None:
|
| 36 |
+
allowed = _allowed_field_set(allowed_fields)
|
| 37 |
+
invalid = sorted(field for field in wanted if field not in allowed)
|
| 38 |
+
if invalid:
|
| 39 |
+
raise ValueError(
|
| 40 |
+
f"Unsupported fields {invalid}. Allowed fields: {sorted(allowed)}"
|
| 41 |
+
)
|
| 42 |
+
projected: list[dict[str, Any]] = []
|
| 43 |
+
for row in items:
|
| 44 |
+
out: dict[str, Any] = {}
|
| 45 |
+
for key in wanted:
|
| 46 |
+
value = row.get(key)
|
| 47 |
+
if value is None:
|
| 48 |
+
continue
|
| 49 |
+
out[key] = value
|
| 50 |
+
projected.append(out)
|
| 51 |
+
return projected
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _project_repo_items(
|
| 55 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 56 |
+
) -> list[dict[str, Any]]:
|
| 57 |
+
return _project_items(self, items, fields, allowed_fields=REPO_CANONICAL_FIELDS)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _project_collection_items(
|
| 61 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 62 |
+
) -> list[dict[str, Any]]:
|
| 63 |
+
return _project_items(
|
| 64 |
+
self, items, fields, allowed_fields=COLLECTION_CANONICAL_FIELDS
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _project_daily_paper_items(
|
| 69 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 70 |
+
) -> list[dict[str, Any]]:
|
| 71 |
+
return _project_items(
|
| 72 |
+
self, items, fields, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _project_user_items(
|
| 77 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 78 |
+
) -> list[dict[str, Any]]:
|
| 79 |
+
return _project_items(self, items, fields, allowed_fields=USER_CANONICAL_FIELDS)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _project_actor_items(
|
| 83 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 84 |
+
) -> list[dict[str, Any]]:
|
| 85 |
+
return _project_items(self, items, fields, allowed_fields=ACTOR_CANONICAL_FIELDS)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _project_user_like_items(
|
| 89 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 90 |
+
) -> list[dict[str, Any]]:
|
| 91 |
+
return _project_items(
|
| 92 |
+
self, items, fields, allowed_fields=USER_LIKES_CANONICAL_FIELDS
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def _project_activity_items(
|
| 97 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 98 |
+
) -> list[dict[str, Any]]:
|
| 99 |
+
return _project_items(
|
| 100 |
+
self, items, fields, allowed_fields=ACTIVITY_CANONICAL_FIELDS
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _project_discussion_items(
|
| 105 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 106 |
+
) -> list[dict[str, Any]]:
|
| 107 |
+
return _project_items(
|
| 108 |
+
self, items, fields, allowed_fields=DISCUSSION_CANONICAL_FIELDS
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _project_discussion_detail_items(
|
| 113 |
+
self: Any, items: list[dict[str, Any]], fields: list[str] | None
|
| 114 |
+
) -> list[dict[str, Any]]:
|
| 115 |
+
return _project_items(
|
| 116 |
+
self, items, fields, allowed_fields=DISCUSSION_DETAIL_CANONICAL_FIELDS
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def _normalize_where(
|
| 121 |
+
self: Any,
|
| 122 |
+
where: dict[str, Any] | None,
|
| 123 |
+
*,
|
| 124 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 125 |
+
) -> dict[str, Any] | None:
|
| 126 |
+
if not isinstance(where, dict) or not where:
|
| 127 |
+
return where
|
| 128 |
+
allowed = _allowed_field_set(allowed_fields) if allowed_fields is not None else None
|
| 129 |
+
normalized: dict[str, Any] = {}
|
| 130 |
+
for key, value in where.items():
|
| 131 |
+
raw_key = str(key).strip()
|
| 132 |
+
if not raw_key:
|
| 133 |
+
continue
|
| 134 |
+
if allowed is not None and raw_key not in allowed:
|
| 135 |
+
raise ValueError(
|
| 136 |
+
f"Unsupported filter fields {[raw_key]}. Allowed fields: {sorted(allowed)}"
|
| 137 |
+
)
|
| 138 |
+
normalized[raw_key] = value
|
| 139 |
+
return normalized
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def _item_matches_where(
|
| 143 |
+
self: Any, item: dict[str, Any], where: dict[str, Any] | None
|
| 144 |
+
) -> bool:
|
| 145 |
+
if not isinstance(where, dict) or not where:
|
| 146 |
+
return True
|
| 147 |
+
for key, cond in where.items():
|
| 148 |
+
value = item.get(str(key))
|
| 149 |
+
if isinstance(cond, dict):
|
| 150 |
+
if "eq" in cond and value != cond.get("eq"):
|
| 151 |
+
return False
|
| 152 |
+
if "in" in cond:
|
| 153 |
+
allowed = cond.get("in")
|
| 154 |
+
if isinstance(allowed, (list, tuple, set)) and value not in allowed:
|
| 155 |
+
return False
|
| 156 |
+
if "contains" in cond:
|
| 157 |
+
needle = cond.get("contains")
|
| 158 |
+
if (
|
| 159 |
+
not isinstance(value, str)
|
| 160 |
+
or not isinstance(needle, str)
|
| 161 |
+
or needle not in value
|
| 162 |
+
):
|
| 163 |
+
return False
|
| 164 |
+
if "icontains" in cond:
|
| 165 |
+
needle = cond.get("icontains")
|
| 166 |
+
if (
|
| 167 |
+
not isinstance(value, str)
|
| 168 |
+
or not isinstance(needle, str)
|
| 169 |
+
or needle.lower() not in value.lower()
|
| 170 |
+
):
|
| 171 |
+
return False
|
| 172 |
+
if "gte" in cond:
|
| 173 |
+
left = _as_int(value)
|
| 174 |
+
right = _as_int(cond.get("gte"))
|
| 175 |
+
if left is None or right is None or left < right:
|
| 176 |
+
return False
|
| 177 |
+
if "lte" in cond:
|
| 178 |
+
left = _as_int(value)
|
| 179 |
+
right = _as_int(cond.get("lte"))
|
| 180 |
+
if left is None or right is None or left > right:
|
| 181 |
+
return False
|
| 182 |
+
continue
|
| 183 |
+
if isinstance(cond, (list, tuple, set)):
|
| 184 |
+
if value not in cond:
|
| 185 |
+
return False
|
| 186 |
+
continue
|
| 187 |
+
if value != cond:
|
| 188 |
+
return False
|
| 189 |
+
return True
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def _apply_where(
|
| 193 |
+
self: Any,
|
| 194 |
+
items: list[dict[str, Any]],
|
| 195 |
+
where: dict[str, Any] | None,
|
| 196 |
+
*,
|
| 197 |
+
allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
|
| 198 |
+
) -> list[dict[str, Any]]:
|
| 199 |
+
normalized_where = _normalize_where(self, where, allowed_fields=allowed_fields)
|
| 200 |
+
if not isinstance(normalized_where, dict) or not normalized_where:
|
| 201 |
+
return items
|
| 202 |
+
return [row for row in items if _item_matches_where(self, row, normalized_where)]
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def _helper_item(self: Any, resp: dict[str, Any]) -> dict[str, Any] | None:
|
| 206 |
+
item = resp.get("item")
|
| 207 |
+
if isinstance(item, dict):
|
| 208 |
+
return item
|
| 209 |
+
items = resp.get("items")
|
| 210 |
+
if isinstance(items, list) and items and isinstance(items[0], dict):
|
| 211 |
+
return items[0]
|
| 212 |
+
return None
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def _overview_count(self: Any, item: dict[str, Any] | None, key: str) -> int | None:
|
| 216 |
+
if not isinstance(item, dict):
|
| 217 |
+
return None
|
| 218 |
+
return _as_int(item.get(key))
|
.prod/monty_api/tool_entrypoints.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""File-based function tool entrypoints for the production Monty runtime."""
|
| 3 |
+
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
_PACKAGE_DIR = Path(__file__).resolve().parent
|
| 11 |
+
_ROOT_DIR = _PACKAGE_DIR.parent
|
| 12 |
+
for candidate in (_ROOT_DIR, _PACKAGE_DIR):
|
| 13 |
+
candidate_str = str(candidate)
|
| 14 |
+
if candidate_str not in sys.path:
|
| 15 |
+
sys.path.insert(0, candidate_str)
|
| 16 |
+
|
| 17 |
+
from monty_api import ( # noqa: E402
|
| 18 |
+
HELPER_EXTERNALS,
|
| 19 |
+
hf_hub_query as _hf_hub_query,
|
| 20 |
+
hf_hub_query_raw as _hf_hub_query_raw,
|
| 21 |
+
main,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
async def hf_hub_query(
|
| 26 |
+
query: str,
|
| 27 |
+
code: str,
|
| 28 |
+
max_calls: int | None = None,
|
| 29 |
+
timeout_sec: int | None = None,
|
| 30 |
+
) -> dict[str, Any]:
|
| 31 |
+
return await _hf_hub_query(
|
| 32 |
+
query=query,
|
| 33 |
+
code=code,
|
| 34 |
+
max_calls=max_calls,
|
| 35 |
+
timeout_sec=timeout_sec,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def hf_hub_query_raw(
|
| 40 |
+
query: str,
|
| 41 |
+
code: str,
|
| 42 |
+
max_calls: int | None = None,
|
| 43 |
+
timeout_sec: int | None = None,
|
| 44 |
+
) -> Any:
|
| 45 |
+
return await _hf_hub_query_raw(
|
| 46 |
+
query=query,
|
| 47 |
+
code=code,
|
| 48 |
+
max_calls=max_calls,
|
| 49 |
+
timeout_sec=timeout_sec,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
__all__ = [
|
| 53 |
+
"HELPER_EXTERNALS",
|
| 54 |
+
"hf_hub_query",
|
| 55 |
+
"hf_hub_query_raw",
|
| 56 |
+
"main",
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
raise SystemExit(main())
|
.prod/monty_api/validation.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import ast
|
| 4 |
+
import re
|
| 5 |
+
import tokenize
|
| 6 |
+
from io import StringIO
|
| 7 |
+
from typing import Any, Callable, cast
|
| 8 |
+
|
| 9 |
+
from .constants import (
|
| 10 |
+
GRAPH_SCAN_LIMIT_CAP,
|
| 11 |
+
LIKES_SCAN_LIMIT_CAP,
|
| 12 |
+
OUTPUT_ITEMS_TRUNCATION_LIMIT,
|
| 13 |
+
SELECTIVE_ENDPOINT_RETURN_HARD_CAP,
|
| 14 |
+
TRENDING_ENDPOINT_MAX_LIMIT,
|
| 15 |
+
)
|
| 16 |
+
from .registry import (
|
| 17 |
+
ALLOWLIST_PATTERNS,
|
| 18 |
+
HELPER_EXTERNALS,
|
| 19 |
+
STRICT_ALLOWLIST_PATTERNS,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _resolve_helper_functions(
|
| 24 |
+
namespace: dict[str, Any],
|
| 25 |
+
) -> dict[str, Callable[..., Any]]:
|
| 26 |
+
resolved: dict[str, Callable[..., Any]] = {}
|
| 27 |
+
for helper_name in HELPER_EXTERNALS:
|
| 28 |
+
candidate = namespace.get(helper_name)
|
| 29 |
+
if not callable(candidate):
|
| 30 |
+
raise RuntimeError(f"Helper '{helper_name}' is not defined or not callable")
|
| 31 |
+
resolved[helper_name] = cast(Callable[..., Any], candidate)
|
| 32 |
+
return resolved
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _normalize_endpoint(endpoint: str) -> str:
|
| 36 |
+
ep = (endpoint or "").strip()
|
| 37 |
+
if not ep:
|
| 38 |
+
raise ValueError("endpoint is required")
|
| 39 |
+
if "?" in ep:
|
| 40 |
+
raise ValueError("endpoint must not include query string; use params")
|
| 41 |
+
if ep.startswith("http://") or ep.startswith("https://"):
|
| 42 |
+
raise ValueError("endpoint must be path-only")
|
| 43 |
+
if not ep.startswith("/"):
|
| 44 |
+
ep = "/" + ep
|
| 45 |
+
if not ep.startswith("/api/"):
|
| 46 |
+
ep = "/api" + ep
|
| 47 |
+
if ep in {"/api/collections/search", "/api/collections/search/"}:
|
| 48 |
+
ep = "/api/collections"
|
| 49 |
+
if ".." in ep:
|
| 50 |
+
raise ValueError("path traversal not allowed")
|
| 51 |
+
return ep
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _endpoint_allowed(endpoint: str, strict_mode: bool) -> bool:
|
| 55 |
+
path = endpoint.split("?", 1)[0]
|
| 56 |
+
patterns = STRICT_ALLOWLIST_PATTERNS if strict_mode else ALLOWLIST_PATTERNS
|
| 57 |
+
return any(re.match(p, path) for p in patterns)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _sanitize_params(endpoint: str, params: dict[str, Any] | None) -> dict[str, Any]:
|
| 61 |
+
clean = dict(params or {})
|
| 62 |
+
path = endpoint.split("?", 1)[0]
|
| 63 |
+
|
| 64 |
+
if path == "/api/collections":
|
| 65 |
+
if "q" not in clean and "search" in clean:
|
| 66 |
+
clean["q"] = clean.get("search")
|
| 67 |
+
clean.pop("search", None)
|
| 68 |
+
|
| 69 |
+
if path == "/api/trending":
|
| 70 |
+
t = str(clean.get("type") or "").strip().lower()
|
| 71 |
+
aliases = {"models": "model", "datasets": "dataset", "spaces": "space"}
|
| 72 |
+
if t in aliases:
|
| 73 |
+
clean["type"] = aliases[t]
|
| 74 |
+
lim = clean.get("limit")
|
| 75 |
+
if lim is not None:
|
| 76 |
+
try:
|
| 77 |
+
n = int(lim)
|
| 78 |
+
except Exception:
|
| 79 |
+
n = TRENDING_ENDPOINT_MAX_LIMIT
|
| 80 |
+
clean["limit"] = max(1, min(n, TRENDING_ENDPOINT_MAX_LIMIT))
|
| 81 |
+
return clean
|
| 82 |
+
|
| 83 |
+
lim = clean.get("limit")
|
| 84 |
+
if lim is None:
|
| 85 |
+
return clean
|
| 86 |
+
try:
|
| 87 |
+
n = int(lim)
|
| 88 |
+
except Exception:
|
| 89 |
+
return clean
|
| 90 |
+
|
| 91 |
+
endpoint_limit_max = SELECTIVE_ENDPOINT_RETURN_HARD_CAP
|
| 92 |
+
if re.match(r"^/api/users/[^/]+/(followers|following)$", path):
|
| 93 |
+
endpoint_limit_max = GRAPH_SCAN_LIMIT_CAP
|
| 94 |
+
elif re.match(r"^/api/users/[^/]+/likes$", path):
|
| 95 |
+
endpoint_limit_max = LIKES_SCAN_LIMIT_CAP
|
| 96 |
+
|
| 97 |
+
clean["limit"] = max(1, min(n, endpoint_limit_max))
|
| 98 |
+
return clean
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _truncate_result_payload(output: Any) -> Any:
|
| 102 |
+
if not isinstance(output, dict):
|
| 103 |
+
return output
|
| 104 |
+
|
| 105 |
+
items = output.get("items")
|
| 106 |
+
if not isinstance(items, list) or len(items) <= OUTPUT_ITEMS_TRUNCATION_LIMIT:
|
| 107 |
+
return output
|
| 108 |
+
|
| 109 |
+
trimmed = dict(output)
|
| 110 |
+
trimmed_items = items[:OUTPUT_ITEMS_TRUNCATION_LIMIT]
|
| 111 |
+
trimmed["items"] = trimmed_items
|
| 112 |
+
trimmed["item"] = trimmed_items[0] if len(trimmed_items) == 1 else None
|
| 113 |
+
note = f"truncated items to first {OUTPUT_ITEMS_TRUNCATION_LIMIT} rows for token efficiency"
|
| 114 |
+
steps = trimmed.get("steps")
|
| 115 |
+
if isinstance(steps, list):
|
| 116 |
+
trimmed["steps"] = [*steps, note]
|
| 117 |
+
else:
|
| 118 |
+
trimmed["steps"] = [note]
|
| 119 |
+
return trimmed
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _is_helper_envelope(output: Any) -> bool:
|
| 123 |
+
return (
|
| 124 |
+
isinstance(output, dict)
|
| 125 |
+
and isinstance(output.get("ok"), bool)
|
| 126 |
+
and "items" in output
|
| 127 |
+
and "meta" in output
|
| 128 |
+
and "error" in output
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def _summarize_limit_hit(helper_name: str, result: Any) -> dict[str, Any] | None:
|
| 133 |
+
if not _is_helper_envelope(result):
|
| 134 |
+
return None
|
| 135 |
+
meta = result.get("meta") if isinstance(result.get("meta"), dict) else {}
|
| 136 |
+
if not isinstance(meta, dict):
|
| 137 |
+
return None
|
| 138 |
+
|
| 139 |
+
truncated_by = str(meta.get("truncated_by") or "")
|
| 140 |
+
limit_hit = any(
|
| 141 |
+
[
|
| 142 |
+
meta.get("truncated") is True,
|
| 143 |
+
meta.get("hard_cap_applied") is True,
|
| 144 |
+
truncated_by in {"scan_limit", "page_limit", "multiple"},
|
| 145 |
+
]
|
| 146 |
+
)
|
| 147 |
+
if not limit_hit:
|
| 148 |
+
return None
|
| 149 |
+
|
| 150 |
+
summary: dict[str, Any] = {
|
| 151 |
+
"helper": helper_name,
|
| 152 |
+
"source": meta.get("source"),
|
| 153 |
+
"returned": meta.get("returned"),
|
| 154 |
+
"total": meta.get("total"),
|
| 155 |
+
"truncated": meta.get("truncated"),
|
| 156 |
+
"truncated_by": meta.get("truncated_by"),
|
| 157 |
+
"more_available": meta.get("more_available"),
|
| 158 |
+
"requested_limit": meta.get("requested_limit"),
|
| 159 |
+
"applied_limit": meta.get("applied_limit"),
|
| 160 |
+
"next_request_hint": meta.get("next_request_hint"),
|
| 161 |
+
"limit_boundary_hit": meta.get("limit_boundary_hit"),
|
| 162 |
+
}
|
| 163 |
+
if meta.get("scan_limit") is not None:
|
| 164 |
+
summary["scan_limit"] = meta.get("scan_limit")
|
| 165 |
+
if meta.get("applied_max_pages") is not None:
|
| 166 |
+
summary["applied_max_pages"] = meta.get("applied_max_pages")
|
| 167 |
+
for key in (
|
| 168 |
+
"ranking_window",
|
| 169 |
+
"requested_ranking_window",
|
| 170 |
+
"ranking_window_applied",
|
| 171 |
+
"ranking_window_hit",
|
| 172 |
+
"ranking_complete",
|
| 173 |
+
"ranking_next_request_hint",
|
| 174 |
+
):
|
| 175 |
+
if meta.get(key) is not None:
|
| 176 |
+
summary[key] = meta.get(key)
|
| 177 |
+
return summary
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _wrap_raw_result(
|
| 181 |
+
result: Any,
|
| 182 |
+
*,
|
| 183 |
+
ok: bool,
|
| 184 |
+
api_calls: int,
|
| 185 |
+
elapsed_ms: int,
|
| 186 |
+
limit_summaries: list[dict[str, Any]] | None = None,
|
| 187 |
+
error: str | None = None,
|
| 188 |
+
) -> dict[str, Any]:
|
| 189 |
+
hits = [dict(summary) for summary in (limit_summaries or [])[:10]]
|
| 190 |
+
meta: dict[str, Any] = {
|
| 191 |
+
"ok": ok,
|
| 192 |
+
"api_calls": api_calls,
|
| 193 |
+
"elapsed_ms": elapsed_ms,
|
| 194 |
+
"limits_reached": bool(hits),
|
| 195 |
+
"limit_summary": hits,
|
| 196 |
+
}
|
| 197 |
+
if error is not None:
|
| 198 |
+
meta["error"] = error
|
| 199 |
+
return {
|
| 200 |
+
"result": result,
|
| 201 |
+
"meta": meta,
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def _validate_generated_code(code: str) -> None:
|
| 206 |
+
if not code.strip():
|
| 207 |
+
raise ValueError("Generated code is empty")
|
| 208 |
+
|
| 209 |
+
blocked_patterns: list[tuple[str, str]] = [
|
| 210 |
+
(r"(?m)^\s*import\s+\S", "import statement"),
|
| 211 |
+
(r"(?m)^\s*from\s+\S+\s+import\s+\S", "from-import statement"),
|
| 212 |
+
(r"\bexec\s*\(", "exec("),
|
| 213 |
+
(r"\beval\s*\(", "eval("),
|
| 214 |
+
(r"\bopen\s*\(", "open("),
|
| 215 |
+
(r"\b__import__\b", "__import__"),
|
| 216 |
+
(r"(?i)\bwhile\s+true\b", "while true"),
|
| 217 |
+
]
|
| 218 |
+
for pattern, label in blocked_patterns:
|
| 219 |
+
if re.search(pattern, code):
|
| 220 |
+
raise ValueError(f"Generated code contains blocked pattern: {label}")
|
| 221 |
+
|
| 222 |
+
try:
|
| 223 |
+
parsed = compile( # noqa: S102 - compile is used for AST validation only.
|
| 224 |
+
code,
|
| 225 |
+
"<generated-monty-code>",
|
| 226 |
+
"exec",
|
| 227 |
+
flags=ast.PyCF_ONLY_AST | ast.PyCF_ALLOW_TOP_LEVEL_AWAIT,
|
| 228 |
+
dont_inherit=True,
|
| 229 |
+
)
|
| 230 |
+
except SyntaxError as e:
|
| 231 |
+
message = e.msg or "invalid syntax"
|
| 232 |
+
raise ValueError(f"Generated code is not valid Python: {message}") from e
|
| 233 |
+
|
| 234 |
+
if not isinstance(parsed, ast.Module):
|
| 235 |
+
raise ValueError("Generated code must be a Python module")
|
| 236 |
+
|
| 237 |
+
solve_defs = [
|
| 238 |
+
node
|
| 239 |
+
for node in parsed.body
|
| 240 |
+
if isinstance(node, ast.AsyncFunctionDef) and node.name == "solve"
|
| 241 |
+
]
|
| 242 |
+
if not solve_defs:
|
| 243 |
+
raise ValueError(
|
| 244 |
+
"Generated code must define `async def solve(query, max_calls): ...`."
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
def _valid_solve_signature(node: ast.AsyncFunctionDef) -> bool:
|
| 248 |
+
args = node.args
|
| 249 |
+
return (
|
| 250 |
+
not args.posonlyargs
|
| 251 |
+
and len(args.args) == 2
|
| 252 |
+
and [arg.arg for arg in args.args] == ["query", "max_calls"]
|
| 253 |
+
and args.vararg is None
|
| 254 |
+
and not args.kwonlyargs
|
| 255 |
+
and args.kwarg is None
|
| 256 |
+
and not args.defaults
|
| 257 |
+
and not args.kw_defaults
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
if not any(_valid_solve_signature(node) for node in solve_defs):
|
| 261 |
+
raise ValueError(
|
| 262 |
+
"`solve` must have signature `async def solve(query, max_calls): ...`."
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
if not parsed.body:
|
| 266 |
+
raise ValueError("Generated code is empty")
|
| 267 |
+
|
| 268 |
+
final_stmt = parsed.body[-1]
|
| 269 |
+
valid_final_await = (
|
| 270 |
+
isinstance(final_stmt, ast.Expr)
|
| 271 |
+
and isinstance(final_stmt.value, ast.Await)
|
| 272 |
+
and isinstance(final_stmt.value.value, ast.Call)
|
| 273 |
+
and isinstance(final_stmt.value.value.func, ast.Name)
|
| 274 |
+
and final_stmt.value.value.func.id == "solve"
|
| 275 |
+
and len(final_stmt.value.value.args) == 2
|
| 276 |
+
and not final_stmt.value.value.keywords
|
| 277 |
+
and all(isinstance(arg, ast.Name) for arg in final_stmt.value.value.args)
|
| 278 |
+
and [cast(ast.Name, arg).id for arg in final_stmt.value.value.args]
|
| 279 |
+
== ["query", "max_calls"]
|
| 280 |
+
)
|
| 281 |
+
if not valid_final_await:
|
| 282 |
+
raise ValueError(
|
| 283 |
+
"Generated code must end with `await solve(query, max_calls)`."
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
for node in ast.walk(parsed):
|
| 287 |
+
if not isinstance(node, ast.Call):
|
| 288 |
+
continue
|
| 289 |
+
if isinstance(node.func, ast.Name) and node.func.id == "call_api":
|
| 290 |
+
raise ValueError(
|
| 291 |
+
"Generated code must use documented hf_* helpers only; raw `call_api(...)` is not part of the prompt contract."
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
helper_name_set = set(HELPER_EXTERNALS)
|
| 295 |
+
has_external_call = any(
|
| 296 |
+
isinstance(node, ast.Call)
|
| 297 |
+
and isinstance(node.func, ast.Name)
|
| 298 |
+
and node.func.id in helper_name_set
|
| 299 |
+
for node in ast.walk(parsed)
|
| 300 |
+
)
|
| 301 |
+
if not has_external_call:
|
| 302 |
+
raise ValueError(
|
| 303 |
+
"Generated code must call at least one documented hf_* helper."
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def _coerce_jsonish_python_literals(code: str) -> str:
|
| 308 |
+
"""Normalize common JSON literals into valid Python names in generated code."""
|
| 309 |
+
replacements = {
|
| 310 |
+
"true": "True",
|
| 311 |
+
"false": "False",
|
| 312 |
+
"null": "None",
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
out_tokens: list[tuple[int, str]] = []
|
| 316 |
+
for tok in tokenize.generate_tokens(StringIO(code).readline):
|
| 317 |
+
tok_type = tok.type
|
| 318 |
+
tok_str = tok.string
|
| 319 |
+
if tok_type == tokenize.NAME and tok_str in replacements:
|
| 320 |
+
tok_str = replacements[tok_str]
|
| 321 |
+
out_tokens.append((tok_type, tok_str))
|
| 322 |
+
return tokenize.untokenize(out_tokens)
|
Dockerfile
CHANGED
|
@@ -11,11 +11,13 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
|
| 11 |
|
| 12 |
WORKDIR /app
|
| 13 |
|
|
|
|
|
|
|
| 14 |
RUN uv pip install --system --no-cache \
|
| 15 |
-
fast-agent-mcp==0.6.1 \
|
| 16 |
-
|
| 17 |
huggingface_hub \
|
| 18 |
-
pydantic-monty
|
| 19 |
|
| 20 |
COPY --link ./ /app
|
| 21 |
RUN chown -R 1000:1000 /app
|
|
|
|
| 11 |
|
| 12 |
WORKDIR /app
|
| 13 |
|
| 14 |
+
COPY wheels /tmp/wheels
|
| 15 |
+
|
| 16 |
RUN uv pip install --system --no-cache \
|
| 17 |
+
"fast-agent-mcp==0.6.1" \
|
| 18 |
+
/tmp/wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl \
|
| 19 |
huggingface_hub \
|
| 20 |
+
"pydantic-monty==0.0.8"
|
| 21 |
|
| 22 |
COPY --link ./ /app
|
| 23 |
RUN chown -R 1000:1000 /app
|
scripts/card_includes.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
_FILE_PLACEHOLDER_RE = re.compile(r"\{\{file:([^}]+)\}\}")
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def expand_file_placeholders(
|
| 11 |
+
text: str,
|
| 12 |
+
*,
|
| 13 |
+
workspace_root: Path,
|
| 14 |
+
seen: set[Path] | None = None,
|
| 15 |
+
) -> str:
|
| 16 |
+
workspace_root = workspace_root.resolve()
|
| 17 |
+
active = set() if seen is None else set(seen)
|
| 18 |
+
|
| 19 |
+
def replace(match: re.Match[str]) -> str:
|
| 20 |
+
raw_ref = match.group(1).strip()
|
| 21 |
+
include_path = Path(raw_ref)
|
| 22 |
+
if not include_path.is_absolute():
|
| 23 |
+
include_path = workspace_root / include_path
|
| 24 |
+
include_path = include_path.resolve()
|
| 25 |
+
if include_path in active:
|
| 26 |
+
raise ValueError(f"cyclic {{file:...}} include detected at {include_path}")
|
| 27 |
+
included = include_path.read_text(encoding="utf-8")
|
| 28 |
+
return expand_file_placeholders(
|
| 29 |
+
included,
|
| 30 |
+
workspace_root=workspace_root,
|
| 31 |
+
seen={*active, include_path},
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
return _FILE_PLACEHOLDER_RE.sub(replace, text)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def materialize_expanded_card(
|
| 39 |
+
card_path: Path,
|
| 40 |
+
*,
|
| 41 |
+
workspace_root: Path,
|
| 42 |
+
out_dir: Path,
|
| 43 |
+
) -> Path:
|
| 44 |
+
card_path = card_path.resolve()
|
| 45 |
+
expanded = expand_file_placeholders(
|
| 46 |
+
card_path.read_text(encoding="utf-8"),
|
| 47 |
+
workspace_root=workspace_root,
|
| 48 |
+
seen={card_path},
|
| 49 |
+
)
|
| 50 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 51 |
+
output_path = out_dir / f".{card_path.stem}.expanded{card_path.suffix}"
|
| 52 |
+
output_path.write_text(expanded, encoding="utf-8")
|
| 53 |
+
return output_path
|
scripts/hub_search_prefab_server.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
|
|
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
import sys
|
|
@@ -10,6 +12,7 @@ from starlette.middleware import Middleware
|
|
| 10 |
from starlette.middleware.cors import CORSMiddleware
|
| 11 |
from starlette.responses import PlainTextResponse
|
| 12 |
|
|
|
|
| 13 |
def _discover_workspace_root() -> Path:
|
| 14 |
env_root = os.getenv("CODE_TOOLS_ROOT")
|
| 15 |
if env_root:
|
|
@@ -29,13 +32,8 @@ SCRIPTS_DIR = Path(__file__).resolve().parent
|
|
| 29 |
CARDS_DIR = PREFAB_ROOT / "agent-cards"
|
| 30 |
CONFIG_PATH = PREFAB_ROOT / "fastagent.config.yaml"
|
| 31 |
RAW_CARD_FILE = CARDS_DIR / "hub_search_raw.md"
|
| 32 |
-
|
| 33 |
-
PREFAB_LLM_RAW_CARD_FILE = CARDS_DIR / "hub_search_prefab_llm_raw.md"
|
| 34 |
-
PREFAB_LLM_CODEGEN_CARD_FILE = CARDS_DIR / "hub_search_prefab_llm_codegen.md"
|
| 35 |
-
PREFAB_LLM_CHAIN_CARD_FILE = CARDS_DIR / "hub_search_prefab_llm_chain.md"
|
| 36 |
RAW_AGENT = "hub_search_raw"
|
| 37 |
-
PREFAB_NATIVE_AGENT = "hub_search_prefab_native"
|
| 38 |
-
PREFAB_LLM_CHAIN_AGENT = "hub_search_prefab_llm_chain"
|
| 39 |
|
| 40 |
HOST = os.getenv("HOST", "0.0.0.0")
|
| 41 |
PORT = int(os.getenv("PORT", "9999"))
|
|
@@ -66,12 +64,8 @@ from fastmcp.server.dependencies import get_access_token
|
|
| 66 |
from fastmcp.tools import ToolResult
|
| 67 |
from mcp.types import TextContent
|
| 68 |
from pydantic import AnyHttpUrl
|
| 69 |
-
from
|
| 70 |
-
|
| 71 |
-
error_wire,
|
| 72 |
-
parse_passthrough_wire,
|
| 73 |
-
parse_runtime_payload,
|
| 74 |
-
)
|
| 75 |
|
| 76 |
|
| 77 |
class _RootResourceRemoteAuthProvider(RemoteAuthProvider):
|
|
@@ -82,6 +76,7 @@ class _RootResourceRemoteAuthProvider(RemoteAuthProvider):
|
|
| 82 |
return self.base_url
|
| 83 |
|
| 84 |
|
|
|
|
| 85 |
def _get_oauth_config() -> tuple[str | None, list[str], str]:
|
| 86 |
oauth_provider = os.environ.get("FAST_AGENT_SERVE_OAUTH", "").lower()
|
| 87 |
if oauth_provider in ("hf", "huggingface"):
|
|
@@ -98,16 +93,18 @@ def _get_oauth_config() -> tuple[str | None, list[str], str]:
|
|
| 98 |
return oauth_provider, oauth_scopes, resource_url
|
| 99 |
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
fast = FastAgent(
|
| 102 |
"hub-search-prefab",
|
| 103 |
config_path=str(CONFIG_PATH),
|
| 104 |
parse_cli_args=False,
|
| 105 |
)
|
| 106 |
-
fast.load_agents(
|
| 107 |
-
fast.load_agents(PREFAB_NATIVE_CARD_FILE)
|
| 108 |
-
fast.load_agents(PREFAB_LLM_RAW_CARD_FILE)
|
| 109 |
-
fast.load_agents(PREFAB_LLM_CODEGEN_CARD_FILE)
|
| 110 |
-
fast.load_agents(PREFAB_LLM_CHAIN_CARD_FILE)
|
| 111 |
|
| 112 |
_oauth_provider, _oauth_scopes, _oauth_resource_url = _get_oauth_config()
|
| 113 |
_auth_provider = None
|
|
@@ -142,13 +139,6 @@ async def _run_raw(query: str) -> str:
|
|
| 142 |
return await _run_agent(RAW_AGENT, query)
|
| 143 |
|
| 144 |
|
| 145 |
-
async def _run_prefab_native(query: str) -> str:
|
| 146 |
-
return await _run_agent(PREFAB_NATIVE_AGENT, query)
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
async def _run_prefab_llm_chain(query: str) -> str:
|
| 150 |
-
return await _run_agent(PREFAB_LLM_CHAIN_AGENT, query)
|
| 151 |
-
|
| 152 |
|
| 153 |
def _get_request_bearer_token() -> str | None:
|
| 154 |
access_token = get_access_token()
|
|
@@ -166,6 +156,7 @@ async def _run_agent(agent_name: str, query: str) -> str:
|
|
| 166 |
request_bearer_token.reset(saved_token)
|
| 167 |
|
| 168 |
|
|
|
|
| 169 |
def _wire_tool_result(wire: dict[str, object]) -> ToolResult:
|
| 170 |
return ToolResult(
|
| 171 |
content=[TextContent(type="text", text="[Rendered Prefab UI]")],
|
|
@@ -173,23 +164,16 @@ def _wire_tool_result(wire: dict[str, object]) -> ToolResult:
|
|
| 173 |
)
|
| 174 |
|
| 175 |
|
|
|
|
| 176 |
def _render_query_wire(query: str, raw_text: str) -> dict[str, object]:
|
| 177 |
payload = parse_runtime_payload(raw_text)
|
| 178 |
return build_runtime_wire(query, payload)
|
| 179 |
|
| 180 |
|
| 181 |
-
def _render_prefab_wire(prefab_text: str) -> dict[str, object]:
|
| 182 |
-
return parse_passthrough_wire(prefab_text)
|
| 183 |
-
|
| 184 |
-
|
| 185 |
async def _build_query_wire(query: str) -> dict[str, object]:
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
except Exception:
|
| 190 |
-
traceback.print_exc()
|
| 191 |
-
raw = await _run_raw(query)
|
| 192 |
-
return _render_query_wire(query, raw)
|
| 193 |
|
| 194 |
|
| 195 |
def _missing_query_json() -> str:
|
|
@@ -206,7 +190,7 @@ def _missing_query_json() -> str:
|
|
| 206 |
|
| 207 |
@mcp.tool(app=True)
|
| 208 |
async def hub_search_prefab(query: str) -> ToolResult:
|
| 209 |
-
"""Run the Prefab UI service
|
| 210 |
try:
|
| 211 |
wire = await _build_query_wire(query)
|
| 212 |
except Exception as exc: # noqa: BLE001
|
|
@@ -215,21 +199,9 @@ async def hub_search_prefab(query: str) -> ToolResult:
|
|
| 215 |
return _wire_tool_result(wire)
|
| 216 |
|
| 217 |
|
| 218 |
-
@mcp.tool
|
| 219 |
-
async def hub_search_prefab_native_debug(query: str | None = None) -> str:
|
| 220 |
-
"""Return the one-pass native Prefab agent payload, before fallback rendering."""
|
| 221 |
-
if not query:
|
| 222 |
-
return _missing_query_json()
|
| 223 |
-
try:
|
| 224 |
-
return await _run_prefab_native(query)
|
| 225 |
-
except Exception as exc: # noqa: BLE001
|
| 226 |
-
traceback.print_exc()
|
| 227 |
-
return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
|
| 228 |
-
|
| 229 |
-
|
| 230 |
@mcp.tool
|
| 231 |
async def hub_search_prefab_wire(query: str | None = None) -> str:
|
| 232 |
-
"""Return final Prefab wire JSON
|
| 233 |
if not query:
|
| 234 |
return json.dumps(error_wire("Missing required argument: query"), ensure_ascii=False)
|
| 235 |
try:
|
|
@@ -252,17 +224,6 @@ async def hub_search_raw_debug(query: str | None = None) -> str:
|
|
| 252 |
return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
|
| 253 |
|
| 254 |
|
| 255 |
-
@mcp.tool
|
| 256 |
-
async def hub_search_prefab_llm_debug(query: str | None = None) -> str:
|
| 257 |
-
"""Return the two-pass LLM chain payload for comparison/debugging."""
|
| 258 |
-
if not query:
|
| 259 |
-
return _missing_query_json()
|
| 260 |
-
try:
|
| 261 |
-
return await _run_prefab_llm_chain(query)
|
| 262 |
-
except Exception as exc: # noqa: BLE001
|
| 263 |
-
traceback.print_exc()
|
| 264 |
-
return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
|
| 265 |
-
|
| 266 |
|
| 267 |
def main() -> None:
|
| 268 |
mcp.run(
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
# ruff: noqa: E402
|
| 4 |
+
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import sys
|
|
|
|
| 12 |
from starlette.middleware.cors import CORSMiddleware
|
| 13 |
from starlette.responses import PlainTextResponse
|
| 14 |
|
| 15 |
+
|
| 16 |
def _discover_workspace_root() -> Path:
|
| 17 |
env_root = os.getenv("CODE_TOOLS_ROOT")
|
| 18 |
if env_root:
|
|
|
|
| 32 |
CARDS_DIR = PREFAB_ROOT / "agent-cards"
|
| 33 |
CONFIG_PATH = PREFAB_ROOT / "fastagent.config.yaml"
|
| 34 |
RAW_CARD_FILE = CARDS_DIR / "hub_search_raw.md"
|
| 35 |
+
EXPANDED_CARDS_DIR = CARDS_DIR
|
|
|
|
|
|
|
|
|
|
| 36 |
RAW_AGENT = "hub_search_raw"
|
|
|
|
|
|
|
| 37 |
|
| 38 |
HOST = os.getenv("HOST", "0.0.0.0")
|
| 39 |
PORT = int(os.getenv("PORT", "9999"))
|
|
|
|
| 64 |
from fastmcp.tools import ToolResult
|
| 65 |
from mcp.types import TextContent
|
| 66 |
from pydantic import AnyHttpUrl
|
| 67 |
+
from card_includes import materialize_expanded_card
|
| 68 |
+
from prefab_hub_ui import build_runtime_wire, error_wire, parse_runtime_payload
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
class _RootResourceRemoteAuthProvider(RemoteAuthProvider):
|
|
|
|
| 76 |
return self.base_url
|
| 77 |
|
| 78 |
|
| 79 |
+
|
| 80 |
def _get_oauth_config() -> tuple[str | None, list[str], str]:
|
| 81 |
oauth_provider = os.environ.get("FAST_AGENT_SERVE_OAUTH", "").lower()
|
| 82 |
if oauth_provider in ("hf", "huggingface"):
|
|
|
|
| 93 |
return oauth_provider, oauth_scopes, resource_url
|
| 94 |
|
| 95 |
|
| 96 |
+
EXPANDED_RAW_CARD_FILE = materialize_expanded_card(
|
| 97 |
+
RAW_CARD_FILE,
|
| 98 |
+
workspace_root=WORKSPACE_ROOT,
|
| 99 |
+
out_dir=EXPANDED_CARDS_DIR,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
fast = FastAgent(
|
| 103 |
"hub-search-prefab",
|
| 104 |
config_path=str(CONFIG_PATH),
|
| 105 |
parse_cli_args=False,
|
| 106 |
)
|
| 107 |
+
fast.load_agents(EXPANDED_RAW_CARD_FILE)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
_oauth_provider, _oauth_scopes, _oauth_resource_url = _get_oauth_config()
|
| 110 |
_auth_provider = None
|
|
|
|
| 139 |
return await _run_agent(RAW_AGENT, query)
|
| 140 |
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
def _get_request_bearer_token() -> str | None:
|
| 144 |
access_token = get_access_token()
|
|
|
|
| 156 |
request_bearer_token.reset(saved_token)
|
| 157 |
|
| 158 |
|
| 159 |
+
|
| 160 |
def _wire_tool_result(wire: dict[str, object]) -> ToolResult:
|
| 161 |
return ToolResult(
|
| 162 |
content=[TextContent(type="text", text="[Rendered Prefab UI]")],
|
|
|
|
| 164 |
)
|
| 165 |
|
| 166 |
|
| 167 |
+
|
| 168 |
def _render_query_wire(query: str, raw_text: str) -> dict[str, object]:
|
| 169 |
payload = parse_runtime_payload(raw_text)
|
| 170 |
return build_runtime_wire(query, payload)
|
| 171 |
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
async def _build_query_wire(query: str) -> dict[str, object]:
|
| 174 |
+
raw = await _run_raw(query)
|
| 175 |
+
return _render_query_wire(query, raw)
|
| 176 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
|
| 179 |
def _missing_query_json() -> str:
|
|
|
|
| 190 |
|
| 191 |
@mcp.tool(app=True)
|
| 192 |
async def hub_search_prefab(query: str) -> ToolResult:
|
| 193 |
+
"""Run the Prefab UI service with deterministic rendering over raw Hub output."""
|
| 194 |
try:
|
| 195 |
wire = await _build_query_wire(query)
|
| 196 |
except Exception as exc: # noqa: BLE001
|
|
|
|
| 199 |
return _wire_tool_result(wire)
|
| 200 |
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
@mcp.tool
|
| 203 |
async def hub_search_prefab_wire(query: str | None = None) -> str:
|
| 204 |
+
"""Return final deterministic Prefab wire JSON for a Hub query."""
|
| 205 |
if not query:
|
| 206 |
return json.dumps(error_wire("Missing required argument: query"), ensure_ascii=False)
|
| 207 |
try:
|
|
|
|
| 224 |
return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
|
| 225 |
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
def main() -> None:
|
| 229 |
mcp.run(
|
scripts/prefab_hub_ui.py
CHANGED
|
@@ -5,10 +5,11 @@ import json
|
|
| 5 |
from copy import deepcopy
|
| 6 |
from typing import Any
|
| 7 |
|
| 8 |
-
from prefab_ui.themes import
|
| 9 |
|
| 10 |
PAGE_CSS_CLASS = "w-full max-w-6xl mx-auto p-4 md:p-6 lg:px-8"
|
| 11 |
-
|
|
|
|
| 12 |
|
| 13 |
_COMPONENT_KEY_ALIASES: dict[str, str] = {
|
| 14 |
"bar_radius": "barRadius",
|
|
@@ -100,6 +101,19 @@ _PREFERRED_METRIC_KEYS: tuple[str, ...] = (
|
|
| 100 |
"normal_likers",
|
| 101 |
)
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
_URL_KEYS: tuple[str, ...] = (
|
| 104 |
"repo_url",
|
| 105 |
"url",
|
|
@@ -109,6 +123,62 @@ _URL_KEYS: tuple[str, ...] = (
|
|
| 109 |
"github_repo_url",
|
| 110 |
)
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
def _copy_default_theme() -> dict[str, Any]:
|
| 114 |
return deepcopy(DEFAULT_THEME)
|
|
@@ -457,14 +527,45 @@ def _is_scalar(value: Any) -> bool:
|
|
| 457 |
return False
|
| 458 |
|
| 459 |
|
| 460 |
-
def _normalize_cell(value: Any) -> Any:
|
| 461 |
if value is None or isinstance(value, (str, int, float, bool)):
|
| 462 |
return value
|
|
|
|
|
|
|
|
|
|
| 463 |
return _compact_text(value)
|
| 464 |
|
| 465 |
|
| 466 |
def _normalize_row(row: dict[str, Any]) -> dict[str, Any]:
|
| 467 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
|
| 469 |
|
| 470 |
def _column_rank(key: str) -> tuple[int, str]:
|
|
@@ -481,6 +582,13 @@ def _metric_rank(key: str) -> tuple[int, str]:
|
|
| 481 |
return (len(_PREFERRED_METRIC_KEYS), key)
|
| 482 |
|
| 483 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
def _build_row_click(rows: list[dict[str, Any]]) -> dict[str, Any] | None:
|
| 485 |
for key in _URL_KEYS:
|
| 486 |
if any(isinstance(row.get(key), str) and row.get(key) for row in rows):
|
|
@@ -491,6 +599,198 @@ def _build_row_click(rows: list[dict[str, Any]]) -> dict[str, Any] | None:
|
|
| 491 |
return None
|
| 492 |
|
| 493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
def _build_table_card(
|
| 495 |
title: str,
|
| 496 |
rows: list[dict[str, Any]],
|
|
@@ -531,7 +831,13 @@ def _build_table_card(
|
|
| 531 |
|
| 532 |
normalized_rows = [_normalize_row(row) for row in rows]
|
| 533 |
all_keys = {key for row in normalized_rows for key in row}
|
| 534 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
columns: list[dict[str, Any]] = []
|
| 536 |
for key in visible_keys:
|
| 537 |
column: dict[str, Any] = {
|
|
@@ -539,8 +845,11 @@ def _build_table_card(
|
|
| 539 |
"header": _titleize(key),
|
| 540 |
"sortable": key not in {"description"},
|
| 541 |
}
|
|
|
|
|
|
|
| 542 |
if any(isinstance(row.get(key), (int, float)) for row in normalized_rows):
|
| 543 |
-
column["
|
|
|
|
| 544 |
column["format"] = "number"
|
| 545 |
if key in {"description"}:
|
| 546 |
column["maxWidth"] = "28rem"
|
|
@@ -556,7 +865,6 @@ def _build_table_card(
|
|
| 556 |
"pageSize": 10,
|
| 557 |
}
|
| 558 |
|
| 559 |
-
row_click = _build_row_click(rows)
|
| 560 |
if row_click is not None:
|
| 561 |
data_table["onRowClick"] = row_click
|
| 562 |
|
|
@@ -588,7 +896,10 @@ def _build_key_value_card(
|
|
| 588 |
*,
|
| 589 |
description: str | None = None,
|
| 590 |
) -> dict[str, Any]:
|
| 591 |
-
rows = [
|
|
|
|
|
|
|
|
|
|
| 592 |
return _build_table_card(
|
| 593 |
title,
|
| 594 |
rows,
|
|
@@ -742,12 +1053,31 @@ def _render_list(
|
|
| 742 |
|
| 743 |
if all(isinstance(item, dict) for item in value):
|
| 744 |
rows = [item for item in value if isinstance(item, dict)]
|
| 745 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
|
| 747 |
rows = [
|
| 748 |
{
|
| 749 |
"index": index + 1,
|
| 750 |
-
"value": _normalize_cell(item),
|
| 751 |
}
|
| 752 |
for index, item in enumerate(value)
|
| 753 |
]
|
|
@@ -764,6 +1094,35 @@ def _render_dict(
|
|
| 764 |
if depth > 2:
|
| 765 |
return [_build_key_value_card(title, value, description=description)]
|
| 766 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
if "results" in value or "coverage" in value:
|
| 768 |
sections: list[dict[str, Any]] = []
|
| 769 |
results = value.get("results")
|
|
@@ -909,6 +1268,19 @@ def _build_summary_card(
|
|
| 909 |
}
|
| 910 |
)
|
| 911 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 912 |
return {"type": "Card", "children": summary_children}
|
| 913 |
|
| 914 |
|
|
@@ -924,7 +1296,7 @@ def build_runtime_wire(query: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
| 924 |
helper_meta: dict[str, Any] | None = None
|
| 925 |
body_children: list[dict[str, Any]] = []
|
| 926 |
|
| 927 |
-
if _looks_like_helper_envelope(result):
|
| 928 |
helper_meta = result.get("meta") if isinstance(result.get("meta"), dict) else None
|
| 929 |
if result.get("ok") is False:
|
| 930 |
message = str(result.get("error") or "Helper query failed")
|
|
@@ -953,10 +1325,11 @@ def build_runtime_wire(query: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
| 953 |
else:
|
| 954 |
body_children.extend(_render_value("Results", result))
|
| 955 |
|
|
|
|
| 956 |
body_view = {
|
| 957 |
"type": "Column",
|
| 958 |
"gap": 6,
|
| 959 |
-
"cssClass":
|
| 960 |
"children": [
|
| 961 |
_build_summary_card(
|
| 962 |
query,
|
|
|
|
| 5 |
from copy import deepcopy
|
| 6 |
from typing import Any
|
| 7 |
|
| 8 |
+
from prefab_ui.themes import Basic
|
| 9 |
|
| 10 |
PAGE_CSS_CLASS = "w-full max-w-6xl mx-auto p-4 md:p-6 lg:px-8"
|
| 11 |
+
WIDE_PAGE_CSS_CLASS = "w-full max-w-[90rem] mx-auto p-4 md:p-6 lg:px-8"
|
| 12 |
+
DEFAULT_THEME: dict[str, Any] = Basic(accent="blue").to_json()
|
| 13 |
|
| 14 |
_COMPONENT_KEY_ALIASES: dict[str, str] = {
|
| 15 |
"bar_radius": "barRadius",
|
|
|
|
| 101 |
"normal_likers",
|
| 102 |
)
|
| 103 |
|
| 104 |
+
_PREFERRED_LABEL_KEYS: tuple[str, ...] = (
|
| 105 |
+
"label",
|
| 106 |
+
"name",
|
| 107 |
+
"title",
|
| 108 |
+
"repo_type",
|
| 109 |
+
"status",
|
| 110 |
+
"task",
|
| 111 |
+
"pipeline_tag",
|
| 112 |
+
"kind",
|
| 113 |
+
"owner",
|
| 114 |
+
"username",
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
_URL_KEYS: tuple[str, ...] = (
|
| 118 |
"repo_url",
|
| 119 |
"url",
|
|
|
|
| 123 |
"github_repo_url",
|
| 124 |
)
|
| 125 |
|
| 126 |
+
_FILTERABLE_COLUMN_KEYS: tuple[str, ...] = (
|
| 127 |
+
"repo_type",
|
| 128 |
+
"pipeline_tag",
|
| 129 |
+
"pipeline_tags",
|
| 130 |
+
"tags",
|
| 131 |
+
"status",
|
| 132 |
+
"license",
|
| 133 |
+
"author",
|
| 134 |
+
"owner",
|
| 135 |
+
"username",
|
| 136 |
+
"user",
|
| 137 |
+
"users",
|
| 138 |
+
"handle",
|
| 139 |
+
"organization",
|
| 140 |
+
"organizations",
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
_FILTERABLE_COLUMN_SUFFIXES: tuple[str, ...] = (
|
| 144 |
+
"_type",
|
| 145 |
+
"_tag",
|
| 146 |
+
"_tags",
|
| 147 |
+
"_status",
|
| 148 |
+
"_license",
|
| 149 |
+
"_author",
|
| 150 |
+
"_owner",
|
| 151 |
+
"_username",
|
| 152 |
+
"_user",
|
| 153 |
+
"_users",
|
| 154 |
+
"_handle",
|
| 155 |
+
"_organization",
|
| 156 |
+
"_organizations",
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
_USER_NAME_KEYS: tuple[str, ...] = (
|
| 160 |
+
"full_name",
|
| 161 |
+
"display_name",
|
| 162 |
+
"name",
|
| 163 |
+
"username",
|
| 164 |
+
"handle",
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
_USER_AVATAR_KEYS: tuple[str, ...] = (
|
| 168 |
+
"avatar_url",
|
| 169 |
+
"avatar",
|
| 170 |
+
"image_url",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
_USER_SOCIAL_LINK_KEYS: tuple[tuple[str, str], ...] = (
|
| 174 |
+
("hf_url", "Hugging Face"),
|
| 175 |
+
("profile_url", "Profile"),
|
| 176 |
+
("website_url", "Website"),
|
| 177 |
+
("blog_url", "Blog"),
|
| 178 |
+
("github_url", "GitHub"),
|
| 179 |
+
("twitter_url", "Twitter"),
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
|
| 183 |
def _copy_default_theme() -> dict[str, Any]:
|
| 184 |
return deepcopy(DEFAULT_THEME)
|
|
|
|
| 527 |
return False
|
| 528 |
|
| 529 |
|
| 530 |
+
def _normalize_cell(value: Any, *, key: str) -> Any:
|
| 531 |
if value is None or isinstance(value, (str, int, float, bool)):
|
| 532 |
return value
|
| 533 |
+
if isinstance(value, list):
|
| 534 |
+
if value and all(isinstance(item, str) for item in value):
|
| 535 |
+
return [_compact_text(item, limit=40) for item in value[:8]]
|
| 536 |
return _compact_text(value)
|
| 537 |
|
| 538 |
|
| 539 |
def _normalize_row(row: dict[str, Any]) -> dict[str, Any]:
|
| 540 |
+
return {
|
| 541 |
+
str(key): _normalize_cell(value, key=str(key)) for key, value in row.items()
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
def _is_badge_friendly_key(key: str) -> bool:
|
| 546 |
+
return key in _FILTERABLE_COLUMN_KEYS or key.endswith(_FILTERABLE_COLUMN_SUFFIXES)
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
def _should_make_filterable(key: str, rows: list[dict[str, Any]]) -> bool:
|
| 550 |
+
if not _is_badge_friendly_key(key):
|
| 551 |
+
return False
|
| 552 |
+
|
| 553 |
+
values = [row.get(key) for row in rows]
|
| 554 |
+
if any(isinstance(value, list) for value in values):
|
| 555 |
+
return True
|
| 556 |
+
|
| 557 |
+
scalar_values = [
|
| 558 |
+
value
|
| 559 |
+
for value in values
|
| 560 |
+
if isinstance(value, (str, int, float, bool))
|
| 561 |
+
]
|
| 562 |
+
if not scalar_values:
|
| 563 |
+
return False
|
| 564 |
+
|
| 565 |
+
if any(isinstance(value, (int, float)) and not isinstance(value, bool) for value in scalar_values):
|
| 566 |
+
return False
|
| 567 |
+
|
| 568 |
+
return 0 < len({str(value) for value in scalar_values}) <= 12
|
| 569 |
|
| 570 |
|
| 571 |
def _column_rank(key: str) -> tuple[int, str]:
|
|
|
|
| 582 |
return (len(_PREFERRED_METRIC_KEYS), key)
|
| 583 |
|
| 584 |
|
| 585 |
+
def _label_rank(key: str) -> tuple[int, str]:
|
| 586 |
+
try:
|
| 587 |
+
return (_PREFERRED_LABEL_KEYS.index(key), key)
|
| 588 |
+
except ValueError:
|
| 589 |
+
return (len(_PREFERRED_LABEL_KEYS), key)
|
| 590 |
+
|
| 591 |
+
|
| 592 |
def _build_row_click(rows: list[dict[str, Any]]) -> dict[str, Any] | None:
|
| 593 |
for key in _URL_KEYS:
|
| 594 |
if any(isinstance(row.get(key), str) and row.get(key) for row in rows):
|
|
|
|
| 599 |
return None
|
| 600 |
|
| 601 |
|
| 602 |
+
def _select_distribution_fields(
|
| 603 |
+
rows: list[dict[str, Any]],
|
| 604 |
+
) -> tuple[str, str] | None:
|
| 605 |
+
if not 2 <= len(rows) <= 8:
|
| 606 |
+
return None
|
| 607 |
+
|
| 608 |
+
shared_keys = set(rows[0])
|
| 609 |
+
for row in rows[1:]:
|
| 610 |
+
shared_keys &= set(row)
|
| 611 |
+
if not shared_keys:
|
| 612 |
+
return None
|
| 613 |
+
|
| 614 |
+
numeric_keys = [
|
| 615 |
+
key
|
| 616 |
+
for key in shared_keys
|
| 617 |
+
if all(isinstance(row.get(key), (int, float)) for row in rows)
|
| 618 |
+
]
|
| 619 |
+
if not numeric_keys:
|
| 620 |
+
return None
|
| 621 |
+
|
| 622 |
+
count_key = sorted(numeric_keys, key=_metric_rank)[0]
|
| 623 |
+
label_candidates = [
|
| 624 |
+
key
|
| 625 |
+
for key in shared_keys
|
| 626 |
+
if key != count_key
|
| 627 |
+
and all(isinstance(row.get(key), str) and row.get(key).strip() for row in rows)
|
| 628 |
+
]
|
| 629 |
+
if not label_candidates:
|
| 630 |
+
return None
|
| 631 |
+
|
| 632 |
+
label_key = sorted(label_candidates, key=_label_rank)[0]
|
| 633 |
+
return label_key, count_key
|
| 634 |
+
|
| 635 |
+
|
| 636 |
+
def _build_distribution_card(
|
| 637 |
+
title: str,
|
| 638 |
+
rows: list[dict[str, Any]],
|
| 639 |
+
*,
|
| 640 |
+
label_key: str,
|
| 641 |
+
count_key: str,
|
| 642 |
+
) -> dict[str, Any]:
|
| 643 |
+
return {
|
| 644 |
+
"type": "Card",
|
| 645 |
+
"children": [
|
| 646 |
+
{
|
| 647 |
+
"type": "CardHeader",
|
| 648 |
+
"children": [
|
| 649 |
+
{"type": "CardTitle", "content": f"{title} distribution"},
|
| 650 |
+
{
|
| 651 |
+
"type": "CardDescription",
|
| 652 |
+
"content": f'{_titleize(count_key)} by {_titleize(label_key).lower()}',
|
| 653 |
+
},
|
| 654 |
+
],
|
| 655 |
+
},
|
| 656 |
+
{
|
| 657 |
+
"type": "CardContent",
|
| 658 |
+
"children": [
|
| 659 |
+
{
|
| 660 |
+
"type": "PieChart",
|
| 661 |
+
"data": rows,
|
| 662 |
+
"dataKey": count_key,
|
| 663 |
+
"nameKey": label_key,
|
| 664 |
+
"innerRadius": 60,
|
| 665 |
+
"paddingAngle": 2,
|
| 666 |
+
"showLegend": True,
|
| 667 |
+
"showTooltip": True,
|
| 668 |
+
"showLabel": False,
|
| 669 |
+
"height": 260,
|
| 670 |
+
}
|
| 671 |
+
],
|
| 672 |
+
},
|
| 673 |
+
],
|
| 674 |
+
}
|
| 675 |
+
|
| 676 |
+
|
| 677 |
+
def _looks_like_user_profile(values: dict[str, Any]) -> bool:
|
| 678 |
+
return any(key in values for key in ("username", "handle", "avatar_url", "hf_url", "profile_url"))
|
| 679 |
+
|
| 680 |
+
|
| 681 |
+
def _first_present(values: dict[str, Any], keys: tuple[str, ...]) -> str | None:
|
| 682 |
+
for key in keys:
|
| 683 |
+
value = values.get(key)
|
| 684 |
+
if isinstance(value, str) and value.strip():
|
| 685 |
+
return value.strip()
|
| 686 |
+
return None
|
| 687 |
+
|
| 688 |
+
|
| 689 |
+
def _user_profile_links(values: dict[str, Any]) -> list[tuple[str, str]]:
|
| 690 |
+
links: list[tuple[str, str]] = []
|
| 691 |
+
for key, label in _USER_SOCIAL_LINK_KEYS:
|
| 692 |
+
value = values.get(key)
|
| 693 |
+
if isinstance(value, str) and value.strip():
|
| 694 |
+
links.append((label, value.strip()))
|
| 695 |
+
|
| 696 |
+
username = _first_present(values, ("username", "handle"))
|
| 697 |
+
if username and not any(label == "Hugging Face" for label, _ in links):
|
| 698 |
+
links.insert(0, ("Hugging Face", f"https://huggingface.co/{username.lstrip('@')}"))
|
| 699 |
+
|
| 700 |
+
github = values.get("github")
|
| 701 |
+
if isinstance(github, str) and github.strip() and not any(label == "GitHub" for label, _ in links):
|
| 702 |
+
links.append(("GitHub", f"https://github.com/{github.strip().lstrip('@')}"))
|
| 703 |
+
|
| 704 |
+
twitter = values.get("twitter")
|
| 705 |
+
if isinstance(twitter, str) and twitter.strip() and not any(label == "Twitter" for label, _ in links):
|
| 706 |
+
links.append(("Twitter", f"https://x.com/{twitter.strip().lstrip('@')}"))
|
| 707 |
+
|
| 708 |
+
deduped: list[tuple[str, str]] = []
|
| 709 |
+
seen_urls: set[str] = set()
|
| 710 |
+
for label, url in links:
|
| 711 |
+
if url in seen_urls:
|
| 712 |
+
continue
|
| 713 |
+
seen_urls.add(url)
|
| 714 |
+
deduped.append((label, url))
|
| 715 |
+
return deduped[:4]
|
| 716 |
+
|
| 717 |
+
|
| 718 |
+
def _build_user_profile_card(title: str, values: dict[str, Any]) -> dict[str, Any] | None:
|
| 719 |
+
name = _first_present(values, _USER_NAME_KEYS)
|
| 720 |
+
if not name:
|
| 721 |
+
return None
|
| 722 |
+
|
| 723 |
+
username = _first_present(values, ("username", "handle"))
|
| 724 |
+
subtitle = f"@{username.lstrip('@')}" if username else title
|
| 725 |
+
avatar = _first_present(values, _USER_AVATAR_KEYS)
|
| 726 |
+
bio = _first_present(values, ("bio", "description", "headline"))
|
| 727 |
+
links = _user_profile_links(values)
|
| 728 |
+
|
| 729 |
+
row_children: list[dict[str, Any]] = []
|
| 730 |
+
if avatar:
|
| 731 |
+
row_children.append(
|
| 732 |
+
{
|
| 733 |
+
"type": "Image",
|
| 734 |
+
"src": avatar,
|
| 735 |
+
"alt": name,
|
| 736 |
+
"width": "64px",
|
| 737 |
+
"height": "64px",
|
| 738 |
+
"cssClass": "rounded-full border object-cover",
|
| 739 |
+
}
|
| 740 |
+
)
|
| 741 |
+
|
| 742 |
+
body_children: list[dict[str, Any]] = [
|
| 743 |
+
{"type": "H3", "content": name},
|
| 744 |
+
{"type": "Muted", "content": subtitle},
|
| 745 |
+
]
|
| 746 |
+
if bio:
|
| 747 |
+
body_children.append({"type": "Text", "content": bio})
|
| 748 |
+
if links:
|
| 749 |
+
body_children.append(
|
| 750 |
+
{
|
| 751 |
+
"type": "Row",
|
| 752 |
+
"gap": 2,
|
| 753 |
+
"cssClass": "flex-wrap",
|
| 754 |
+
"children": [
|
| 755 |
+
{
|
| 756 |
+
"type": "Button",
|
| 757 |
+
"label": "View profile" if index == 0 else label,
|
| 758 |
+
"variant": "default" if index == 0 else "outline",
|
| 759 |
+
"buttonType": "button",
|
| 760 |
+
"onClick": {"action": "openLink", "url": url},
|
| 761 |
+
}
|
| 762 |
+
for index, (label, url) in enumerate(links)
|
| 763 |
+
],
|
| 764 |
+
}
|
| 765 |
+
)
|
| 766 |
+
|
| 767 |
+
row_children.append({"type": "Column", "gap": 2, "children": body_children})
|
| 768 |
+
|
| 769 |
+
return {
|
| 770 |
+
"type": "Card",
|
| 771 |
+
"children": [
|
| 772 |
+
{
|
| 773 |
+
"type": "CardContent",
|
| 774 |
+
"cssClass": "p-6",
|
| 775 |
+
"children": [{"type": "Row", "gap": 4, "align": "center", "children": row_children}],
|
| 776 |
+
}
|
| 777 |
+
],
|
| 778 |
+
}
|
| 779 |
+
|
| 780 |
+
|
| 781 |
+
def _prefers_wide_layout(value: Any) -> bool:
|
| 782 |
+
if isinstance(value, list):
|
| 783 |
+
return bool(value) and all(isinstance(item, dict) for item in value)
|
| 784 |
+
if isinstance(value, dict):
|
| 785 |
+
items = value.get("items")
|
| 786 |
+
if isinstance(items, list) and items and all(isinstance(item, dict) for item in items):
|
| 787 |
+
return True
|
| 788 |
+
results = value.get("results")
|
| 789 |
+
if isinstance(results, list) and results and all(isinstance(item, dict) for item in results):
|
| 790 |
+
return True
|
| 791 |
+
return False
|
| 792 |
+
|
| 793 |
+
|
| 794 |
def _build_table_card(
|
| 795 |
title: str,
|
| 796 |
rows: list[dict[str, Any]],
|
|
|
|
| 831 |
|
| 832 |
normalized_rows = [_normalize_row(row) for row in rows]
|
| 833 |
all_keys = {key for row in normalized_rows for key in row}
|
| 834 |
+
row_click = _build_row_click(rows)
|
| 835 |
+
visible_keys = sorted(all_keys, key=_column_rank)
|
| 836 |
+
if row_click is not None:
|
| 837 |
+
non_url_keys = [key for key in visible_keys if key not in _URL_KEYS]
|
| 838 |
+
if non_url_keys:
|
| 839 |
+
visible_keys = non_url_keys
|
| 840 |
+
visible_keys = visible_keys[:8]
|
| 841 |
columns: list[dict[str, Any]] = []
|
| 842 |
for key in visible_keys:
|
| 843 |
column: dict[str, Any] = {
|
|
|
|
| 845 |
"header": _titleize(key),
|
| 846 |
"sortable": key not in {"description"},
|
| 847 |
}
|
| 848 |
+
if _should_make_filterable(key, normalized_rows):
|
| 849 |
+
column["filterable"] = True
|
| 850 |
if any(isinstance(row.get(key), (int, float)) for row in normalized_rows):
|
| 851 |
+
column["headerClass"] = "text-right"
|
| 852 |
+
column["cellClass"] = "text-right"
|
| 853 |
column["format"] = "number"
|
| 854 |
if key in {"description"}:
|
| 855 |
column["maxWidth"] = "28rem"
|
|
|
|
| 865 |
"pageSize": 10,
|
| 866 |
}
|
| 867 |
|
|
|
|
| 868 |
if row_click is not None:
|
| 869 |
data_table["onRowClick"] = row_click
|
| 870 |
|
|
|
|
| 896 |
*,
|
| 897 |
description: str | None = None,
|
| 898 |
) -> dict[str, Any]:
|
| 899 |
+
rows = [
|
| 900 |
+
{"field": _titleize(key), "value": _normalize_cell(value, key=str(key))}
|
| 901 |
+
for key, value in values.items()
|
| 902 |
+
]
|
| 903 |
return _build_table_card(
|
| 904 |
title,
|
| 905 |
rows,
|
|
|
|
| 1053 |
|
| 1054 |
if all(isinstance(item, dict) for item in value):
|
| 1055 |
rows = [item for item in value if isinstance(item, dict)]
|
| 1056 |
+
table_card = _build_table_card(title, rows, description=description)
|
| 1057 |
+
distribution_fields = _select_distribution_fields(rows)
|
| 1058 |
+
if distribution_fields is None:
|
| 1059 |
+
return [table_card]
|
| 1060 |
+
label_key, count_key = distribution_fields
|
| 1061 |
+
return [
|
| 1062 |
+
{
|
| 1063 |
+
"type": "Column",
|
| 1064 |
+
"gap": 4,
|
| 1065 |
+
"children": [
|
| 1066 |
+
_build_distribution_card(
|
| 1067 |
+
title,
|
| 1068 |
+
rows,
|
| 1069 |
+
label_key=label_key,
|
| 1070 |
+
count_key=count_key,
|
| 1071 |
+
),
|
| 1072 |
+
table_card,
|
| 1073 |
+
],
|
| 1074 |
+
}
|
| 1075 |
+
]
|
| 1076 |
|
| 1077 |
rows = [
|
| 1078 |
{
|
| 1079 |
"index": index + 1,
|
| 1080 |
+
"value": _normalize_cell(item, key="value"),
|
| 1081 |
}
|
| 1082 |
for index, item in enumerate(value)
|
| 1083 |
]
|
|
|
|
| 1094 |
if depth > 2:
|
| 1095 |
return [_build_key_value_card(title, value, description=description)]
|
| 1096 |
|
| 1097 |
+
if depth <= 1 and _looks_like_user_profile(value):
|
| 1098 |
+
sections: list[dict[str, Any]] = []
|
| 1099 |
+
user_card = _build_user_profile_card(title, value)
|
| 1100 |
+
if user_card is not None:
|
| 1101 |
+
sections.append(user_card)
|
| 1102 |
+
remaining = {
|
| 1103 |
+
key: item
|
| 1104 |
+
for key, item in value.items()
|
| 1105 |
+
if key
|
| 1106 |
+
not in {
|
| 1107 |
+
*_USER_NAME_KEYS,
|
| 1108 |
+
*_USER_AVATAR_KEYS,
|
| 1109 |
+
"bio",
|
| 1110 |
+
"description",
|
| 1111 |
+
"headline",
|
| 1112 |
+
"hf_url",
|
| 1113 |
+
"profile_url",
|
| 1114 |
+
"website_url",
|
| 1115 |
+
"blog_url",
|
| 1116 |
+
"github_url",
|
| 1117 |
+
"twitter_url",
|
| 1118 |
+
"github",
|
| 1119 |
+
"twitter",
|
| 1120 |
+
}
|
| 1121 |
+
}
|
| 1122 |
+
if remaining:
|
| 1123 |
+
sections.extend(_render_dict(title, remaining, description=description, depth=depth + 1))
|
| 1124 |
+
return sections
|
| 1125 |
+
|
| 1126 |
if "results" in value or "coverage" in value:
|
| 1127 |
sections: list[dict[str, Any]] = []
|
| 1128 |
results = value.get("results")
|
|
|
|
| 1268 |
}
|
| 1269 |
)
|
| 1270 |
|
| 1271 |
+
if isinstance(runtime_meta, dict) and runtime_meta.get("elapsed_ms") is not None:
|
| 1272 |
+
summary_children.append(
|
| 1273 |
+
{
|
| 1274 |
+
"type": "CardFooter",
|
| 1275 |
+
"children": [
|
| 1276 |
+
{
|
| 1277 |
+
"type": "Muted",
|
| 1278 |
+
"content": f'Runtime: {runtime_meta["elapsed_ms"]} ms',
|
| 1279 |
+
}
|
| 1280 |
+
],
|
| 1281 |
+
}
|
| 1282 |
+
)
|
| 1283 |
+
|
| 1284 |
return {"type": "Card", "children": summary_children}
|
| 1285 |
|
| 1286 |
|
|
|
|
| 1296 |
helper_meta: dict[str, Any] | None = None
|
| 1297 |
body_children: list[dict[str, Any]] = []
|
| 1298 |
|
| 1299 |
+
if isinstance(result, dict) and _looks_like_helper_envelope(result):
|
| 1300 |
helper_meta = result.get("meta") if isinstance(result.get("meta"), dict) else None
|
| 1301 |
if result.get("ok") is False:
|
| 1302 |
message = str(result.get("error") or "Helper query failed")
|
|
|
|
| 1325 |
else:
|
| 1326 |
body_children.extend(_render_value("Results", result))
|
| 1327 |
|
| 1328 |
+
page_css_class = WIDE_PAGE_CSS_CLASS if _prefers_wide_layout(result) else PAGE_CSS_CLASS
|
| 1329 |
body_view = {
|
| 1330 |
"type": "Column",
|
| 1331 |
"gap": 6,
|
| 1332 |
+
"cssClass": page_css_class,
|
| 1333 |
"children": [
|
| 1334 |
_build_summary_card(
|
| 1335 |
query,
|
wheels/.gitkeep
ADDED
|
File without changes
|
wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20a94bcc2a2fd2bd31f2430ee7fd8f04f2ac410afb2932f03014a8609bce5fb3
|
| 3 |
+
size 896909
|