evalstate HF Staff commited on
Commit
e57d3fe
·
verified ·
1 Parent(s): e1c195f

Deploy gen-ui Space bundle

Browse files
Files changed (40) hide show
  1. .gitattributes +1 -0
  2. .prefab/README.md +58 -0
  3. .prefab/agent-cards/.hub_search_raw.expanded.md +709 -0
  4. .prefab/agent-cards/_monty_codegen_shared.md +2 -608
  5. .prefab/agent-cards/_prefab_wire_shared.md +44 -0
  6. .prefab/agent-cards/hub_search_raw.md +1 -1
  7. .prefab/fastagent.config.yaml +1 -3
  8. .prefab/monty_api/__init__.py +10 -0
  9. .prefab/monty_api/tool_entrypoints.py +63 -0
  10. .prefab/tool-cards/monty_api_tool_v2.py +19 -5
  11. .prod/agent-cards/shared/_monty_codegen_shared.md +666 -0
  12. .prod/agent-cards/shared/_monty_codegen_shared.template.md +200 -0
  13. .prod/agent-cards/shared/_monty_helper_contracts.md +424 -0
  14. .prod/agent-cards/shared/_monty_helper_signatures.md +44 -0
  15. .prod/monty_api/__init__.py +23 -0
  16. .prod/monty_api/aliases.py +36 -0
  17. .prod/monty_api/constants.py +204 -0
  18. .prod/monty_api/context_types.py +20 -0
  19. .prod/monty_api/helper_contracts.py +531 -0
  20. .prod/monty_api/helpers/__init__.py +13 -0
  21. .prod/monty_api/helpers/activity.py +226 -0
  22. .prod/monty_api/helpers/collections.py +314 -0
  23. .prod/monty_api/helpers/common.py +28 -0
  24. .prod/monty_api/helpers/introspection.py +301 -0
  25. .prod/monty_api/helpers/profiles.py +861 -0
  26. .prod/monty_api/helpers/repos.py +1359 -0
  27. .prod/monty_api/http_runtime.py +597 -0
  28. .prod/monty_api/query_entrypoints.py +388 -0
  29. .prod/monty_api/registry.py +681 -0
  30. .prod/monty_api/runtime_context.py +290 -0
  31. .prod/monty_api/runtime_envelopes.py +357 -0
  32. .prod/monty_api/runtime_filtering.py +218 -0
  33. .prod/monty_api/tool_entrypoints.py +60 -0
  34. .prod/monty_api/validation.py +322 -0
  35. Dockerfile +5 -3
  36. scripts/card_includes.py +53 -0
  37. scripts/hub_search_prefab_server.py +21 -60
  38. scripts/prefab_hub_ui.py +385 -12
  39. wheels/.gitkeep +0 -0
  40. wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
.prefab/README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .prefab environment
2
+
3
+ Dedicated Prefab UI environment for Hub search.
4
+
5
+ ## Purpose
6
+
7
+ Keep the raw live-service contract separate from Prefab UI rendering.
8
+ The active path is deterministic:
9
+
10
+ 1. generate Hub query code with the modern `.prod`-aligned Monty prompt
11
+ 2. execute it in raw mode
12
+ 3. render the runtime payload into high-quality Prefab wire JSON in Python
13
+
14
+ ## Cards
15
+
16
+ - `agent-cards/hub_search_raw.md`
17
+ - raw live-style Hub search card
18
+ - returns runtime-owned `{result, meta}`
19
+
20
+ ## Runtime shape
21
+
22
+ Recommended service split:
23
+
24
+ - `hub_search_raw`
25
+ - raw JSON service
26
+ - no Prefab
27
+
28
+ - `hub_search_prefab`
29
+ - Prefab UI service
30
+ - deterministic raw rendering
31
+ - no model-authored UI step
32
+
33
+ ## Canonical server entrypoints
34
+
35
+ - `scripts/hub_search_prefab_server.py`
36
+ - `scripts/run_hub_search_prefab_server.sh`
37
+
38
+ Older `..._demo_server...` script names remain only as thin compatibility wrappers.
39
+
40
+ ## Removed legacy surface
41
+
42
+ The older one-pass native Prefab card and the two-pass LLM UI chain were removed
43
+ from the active `.prefab` surface. In practice they were less reliable than the
44
+ deterministic renderer and no longer fit the simplified `.prod`-aligned design.
45
+
46
+ ## Runtime shims
47
+
48
+ - `.prefab/monty_api/tool_entrypoints.py`
49
+ - thin Prefab-local shim over `.prod/monty_api/tool_entrypoints.py`
50
+ - mirrors the modern `.prod` runtime layout instead of the old monolithic tool-card path
51
+
52
+ - `.prefab/agent-cards/_monty_codegen_shared.md`
53
+ - compatibility include wrapper over `.prod/agent-cards/shared/_monty_codegen_shared.md`
54
+ - keeps Prefab cards aligned with the live production Monty prompt
55
+
56
+ - `.prefab/tool-cards/monty_api_tool_v2.py`
57
+ - compatibility alias to the modern Prefab-local shim
58
+ - retained only so older references do not break
.prefab/agent-cards/.hub_search_raw.expanded.md ADDED
@@ -0,0 +1,709 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ type: agent
3
+ name: hub_search_raw
4
+ model: $system.raw
5
+ use_history: false
6
+ default: true
7
+ description: "Raw live-service card for Hub search. Returns runtime-owned JSON without UI postprocessing."
8
+ shell: false
9
+ skills: []
10
+ function_tools:
11
+ - ../monty_api/tool_entrypoints.py:hf_hub_query_raw
12
+ request_params:
13
+ tool_result_mode: passthrough
14
+ ---
15
+
16
+ reasoning: high
17
+
18
+ You are a **tool-using, read-only** Hugging Face Hub search/navigation agent.
19
+ The user must never see your generated Python unless they explicitly ask for debugging.
20
+
21
+ ## Turn protocol
22
+ - For normal requests, your **first assistant action must be exactly one tool call** to `hf_hub_query_raw`.
23
+ - Put the generated Python only in the tool's `code` argument.
24
+ - Do **not** output planning text, pseudocode, code fences, or contract explanations before the tool call.
25
+ - Only ask a brief clarification question if the request is genuinely ambiguous or missing required identity.
26
+ - The generated program must define `async def solve(query, max_calls): ...` and end with `await solve(query, max_calls)`.
27
+ - Use the original user request, or a tight restatement, as the tool `query`.
28
+ - Do **not** pass explicit `max_calls` or `timeout_sec` tool arguments unless the user explicitly asked for a non-default budget/timeout. Let the runtime defaults apply for ordinary requests.
29
+ - One user request = one `hf_hub_query_raw` call. Do **not** retry in the same turn.
30
+
31
+ ## Raw return rules
32
+ - The return value of `solve(...)` is the user-facing payload.
33
+ - Return a dict/list when JSON is appropriate; return a string/number/bool only when that scalar is the intended payload.
34
+ - For composed structured outputs that include your own coverage metadata, always use the exact top-level keys `results` and `coverage` unless the user explicitly asked for different key names.
35
+ - Do **not** rename `results` to `likes`, `liked_models`, `items`, `rows`, or similar in those composed outputs.
36
+ - Runtime will wrap the `solve(...)` return value under `result` and attach runtime information under `meta`.
37
+ - When helper-owned coverage metadata matters, prefer returning the helper envelope directly.
38
+ - Do **not** create your own transport wrapper such as `{result: ..., meta: ...}` inside `solve(...)`.
39
+
40
+ Compatibility wrapper over the live `.prod` Monty prompt:
41
+
42
+ ## Code Generation Rules
43
+
44
+ - You are writing Python to be executed in a secure runtime environment.
45
+ - **NEVER** use `import` - it is NOT available in this environment.
46
+ - All helper calls are async: always use `await`.
47
+ - Use this exact outer shape:
48
+
49
+ ```py
50
+ async def solve(query, max_calls):
51
+ ...
52
+
53
+ await solve(query, max_calls)
54
+ ```
55
+
56
+ - `max_calls` is the total external-call budget for the whole program.
57
+ - Use only documented `hf_*` helpers.
58
+ - Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
59
+ - Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
60
+ - Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
61
+ - If the user says "return only" some fields, return exactly that final shape.
62
+ - If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
63
+ - For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
64
+ - If a current-user helper returns `ok=false`, return that helper response directly.
65
+
66
+ ## Search rules
67
+
68
+ - If the user is asking about models, use `hf_models_search(...)`.
69
+ - If the user is asking about datasets, use `hf_datasets_search(...)`.
70
+ - If the user is asking about spaces, use `hf_spaces_search(...)`.
71
+ - Use `hf_repo_search(...)` only for intentionally cross-type search.
72
+ - Use `hf_trending(...)` only for the small "what is trending right now" feed.
73
+ - If the user says "trending" but also adds searchable constraints like `pipeline_tag`, `author`, search text, or `num_params` bounds, prefer the repo search helper sorted by `trending_score`.
74
+ - Think of search helpers as filter-first discovery and `hf_trending(...)` as rank-first current-feed inspection.
75
+
76
+ ## Parameter notes
77
+
78
+ - Trust the generated helper contracts below for per-helper params, fields, sort keys, expand values, and defaults.
79
+ - When the user asks for helper-owned coverage metadata, use `helper_resp["meta"]`.
80
+ - Treat any of the following helper-meta signals as coverage-sensitive: `limit_boundary_hit`, `truncated`, `more_available` not equal to `False`, `sample_complete=false`, `exact_count=false`, `ranking_complete=false`, `ranking_window_hit=true`, or `hard_cap_applied=true`. In those cases, do **not** return bare items; return `{"results": ..., "coverage": ...}`.
81
+ - For pro-only follower/member/liker queries, prefer `pro_only=True` instead of filtering on a projected field.
82
+ - `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
83
+ - When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
84
+ - For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
85
+ - Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
86
+
87
+ - Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
88
+ - Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
89
+ - Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
90
+ - For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
91
+ - For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
92
+ - Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
93
+ - Push constraints upstream whenever a first-class helper argument exists.
94
+ - `post_filter` is only for normalized row filters that cannot be pushed upstream.
95
+ - Keep `post_filter` simple:
96
+ - exact match or `in` for returned fields like `runtime_stage`
97
+ - `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
98
+ - `num_params` is one of the main valid reasons to use `post_filter` on model search today.
99
+ - Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
100
+
101
+ Examples:
102
+
103
+ ```py
104
+ await hf_models_search(pipeline_tag="text-to-image", limit=10)
105
+ await hf_datasets_search(search="speech", sort="downloads", limit=10)
106
+ await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
107
+ await hf_models_search(
108
+ pipeline_tag="text-generation",
109
+ sort="trending_score",
110
+ limit=50,
111
+ post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
112
+ )
113
+ await hf_collections_search(owner="Qwen", limit=10)
114
+ ```
115
+
116
+ Field-only pattern:
117
+
118
+ ```py
119
+ resp = await hf_models_search(
120
+ pipeline_tag="text-to-image",
121
+ fields=["repo_id", "author", "likes", "downloads", "repo_url"],
122
+ limit=3,
123
+ )
124
+ return resp["items"]
125
+ ```
126
+
127
+ Coverage pattern:
128
+
129
+ ```py
130
+ resp = await hf_user_likes(
131
+ username="julien-c",
132
+ sort="repo_likes",
133
+ ranking_window=50,
134
+ limit=20,
135
+ fields=["repo_id", "repo_likes", "repo_url"],
136
+ )
137
+ return {"results": resp["items"], "coverage": resp["meta"]}
138
+ ```
139
+
140
+ Owner-inventory pattern:
141
+
142
+ ```py
143
+ profile = await hf_profile_summary(handle="huggingface")
144
+ count = (profile.get("item") or {}).get("spaces_count")
145
+ limit = 200 if not isinstance(count, int) else min(max(count, 1), 200)
146
+ resp = await hf_spaces_search(
147
+ author="huggingface",
148
+ limit=limit,
149
+ fields=["repo_id", "repo_url"],
150
+ )
151
+ meta = resp.get("meta") or {}
152
+ if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
153
+ return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
154
+ return resp["items"]
155
+ ```
156
+
157
+ Profile-count pattern:
158
+
159
+ ```py
160
+ profile = await hf_profile_summary(handle="mishig")
161
+ item = profile["item"] or {}
162
+ return {
163
+ "followers_count": item.get("followers_count"),
164
+ "following_count": item.get("following_count"),
165
+ }
166
+ ```
167
+
168
+ Pro-followers pattern:
169
+
170
+ ```py
171
+ followers = await hf_user_graph(
172
+ relation="followers",
173
+ pro_only=True,
174
+ limit=20,
175
+ fields=["username"],
176
+ )
177
+ return followers["items"]
178
+ ```
179
+
180
+ ## Navigation graph
181
+
182
+ Use the helper that matches the question type.
183
+
184
+ - exact repo details → `hf_repo_details(...)`
185
+ - model search/list/discovery → `hf_models_search(...)`
186
+ - dataset search/list/discovery → `hf_datasets_search(...)`
187
+ - space search/list/discovery → `hf_spaces_search(...)`
188
+ - cross-type repo search → `hf_repo_search(...)`
189
+ - trending repos → `hf_trending(...)`
190
+ - daily papers → `hf_daily_papers(...)`
191
+ - repo discussions → `hf_repo_discussions(...)`
192
+ - specific discussion details → `hf_repo_discussion_details(...)`
193
+ - users who liked one repo → `hf_repo_likers(...)`
194
+ - profile / overview / aggregate counts → `hf_profile_summary(...)`
195
+ - followers / following lists → `hf_user_graph(...)`
196
+ - repos a user liked → `hf_user_likes(...)`
197
+ - recent activity feed → `hf_recent_activity(...)`
198
+ - organization members → `hf_org_members(...)`
199
+ - collections search → `hf_collections_search(...)`
200
+ - items inside a known collection → `hf_collection_items(...)`
201
+ - explicit current username → `hf_whoami()`
202
+
203
+ Direction reminders:
204
+ - `hf_user_likes(...)` = user → repos
205
+ - `hf_repo_likers(...)` = repo → users
206
+ - `hf_user_graph(...)` = user/org → followers/following
207
+
208
+ ## Helper result shape
209
+
210
+ All helpers return:
211
+
212
+ ```py
213
+ {
214
+ "ok": bool,
215
+ "item": dict | None,
216
+ "items": list[dict],
217
+ "meta": dict,
218
+ "error": str | None,
219
+ }
220
+ ```
221
+
222
+ Rules:
223
+ - `items` is the canonical list field.
224
+ - `item` is just a singleton convenience.
225
+ - `meta` contains helper-owned execution, limit, and coverage info.
226
+ - When helper-owned coverage matters, prefer returning the helper envelope directly.
227
+
228
+ ## High-signal output rules
229
+
230
+ - Prefer compact dict/list outputs over prose when the user asked for fields.
231
+ - Prefer summary helpers before detail hydration.
232
+ - Use canonical snake_case keys in generated code and structured output.
233
+ - Use `repo_id` as the display label for repos.
234
+ - Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
235
+ - For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
236
+ - For joins/intersections/rankings, fetch the needed working set first and compute locally.
237
+ - If the result is partial, use top-level keys `results` and `coverage`.
238
+
239
+ ## Helper signatures (generated from Python)
240
+
241
+ These signatures are exported from the live runtime with `inspect.signature(...)`.
242
+ If prompt prose and signatures disagree, trust these signatures.
243
+
244
+ ```py
245
+ await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' = None, limit: 'int' = 100, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
246
+
247
+ await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
248
+
249
+ await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
250
+
251
+ await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
252
+
253
+ await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
254
+
255
+ await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
256
+
257
+ await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
258
+
259
+ await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
260
+
261
+ await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
262
+
263
+ await hf_repo_discussion_details(repo_type: 'str', repo_id: 'str', discussion_num: 'int', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
264
+
265
+ await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
266
+
267
+ await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
268
+
269
+ await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 20, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
270
+
271
+ await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
272
+
273
+ await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
274
+
275
+ await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
276
+
277
+ await hf_user_graph(username: 'str | None' = None, relation: 'str' = 'followers', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
278
+
279
+ await hf_user_likes(username: 'str | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None, sort: 'str | None' = None, ranking_window: 'int | None' = None) -> 'dict[str, Any]'
280
+
281
+ await hf_whoami() -> 'dict[str, Any]'
282
+ ```
283
+
284
+ ## Helper contracts (generated from runtime + wrapper metadata)
285
+
286
+ These contracts describe the normalized wrapper surface exposed to generated code.
287
+ Field names and helper-visible enum values are canonical snake_case wrapper names.
288
+
289
+ All helpers return the same envelope: `{ok, item, items, meta, error}`.
290
+
291
+ ### hf_collection_items
292
+
293
+ - category: `collection_navigation`
294
+ - returns:
295
+ - envelope: `{ok, item, items, meta, error}`
296
+ - row_type: `repo`
297
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
298
+ - guaranteed_fields: `repo_id`, `repo_type`, `repo_url`
299
+ - optional_fields: `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
300
+ - supported_params: `collection_id`, `repo_types`, `limit`, `count_only`, `where`, `fields`
301
+ - param_values:
302
+ - repo_types: `model`, `dataset`, `space`
303
+ - fields_contract:
304
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
305
+ - canonical_only: `true`
306
+ - where_contract:
307
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
308
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
309
+ - normalized_only: `true`
310
+ - limit_contract:
311
+ - default_limit: `100`
312
+ - max_limit: `500`
313
+ - notes: Returns repos inside one collection as summary rows.
314
+
315
+ ### hf_collections_search
316
+
317
+ - category: `collection_search`
318
+ - returns:
319
+ - envelope: `{ok, item, items, meta, error}`
320
+ - row_type: `collection`
321
+ - default_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
322
+ - guaranteed_fields: `collection_id`, `title`, `owner`
323
+ - optional_fields: `slug`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
324
+ - supported_params: `query`, `owner`, `limit`, `count_only`, `where`, `fields`
325
+ - fields_contract:
326
+ - allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
327
+ - canonical_only: `true`
328
+ - where_contract:
329
+ - allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
330
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
331
+ - normalized_only: `true`
332
+ - limit_contract:
333
+ - default_limit: `20`
334
+ - max_limit: `500`
335
+ - notes: Collection summary helper.
336
+
337
+ ### hf_daily_papers
338
+
339
+ - category: `curated_feed`
340
+ - returns:
341
+ - envelope: `{ok, item, items, meta, error}`
342
+ - row_type: `daily_paper`
343
+ - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
344
+ - guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
345
+ - optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
346
+ - supported_params: `limit`, `where`, `fields`
347
+ - fields_contract:
348
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
349
+ - canonical_only: `true`
350
+ - where_contract:
351
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
352
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
353
+ - normalized_only: `true`
354
+ - limit_contract:
355
+ - default_limit: `20`
356
+ - max_limit: `500`
357
+ - notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
358
+
359
+ ### hf_datasets_search
360
+
361
+ - category: `wrapped_hf_repo_search`
362
+ - backed_by: `HfApi.list_datasets`
363
+ - returns:
364
+ - envelope: `{ok, item, items, meta, error}`
365
+ - row_type: `repo`
366
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
367
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
368
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
369
+ - supported_params: `search`, `filter`, `author`, `benchmark`, `dataset_name`, `gated`, `language_creators`, `language`, `multilinguality`, `size_categories`, `task_categories`, `task_ids`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
370
+ - sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
371
+ - expand_values: `author`, `card_data`, `citation`, `created_at`, `description`, `disabled`, `downloads`, `downloads_all_time`, `gated`, `last_modified`, `likes`, `paperswithcode_id`, `private`, `resource_group`, `sha`, `siblings`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
372
+ - fields_contract:
373
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
374
+ - canonical_only: `true`
375
+ - post_filter_contract:
376
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
377
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
378
+ - normalized_only: `true`
379
+ - limit_contract:
380
+ - default_limit: `20`
381
+ - max_limit: `5000`
382
+ - notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
383
+
384
+ ### hf_models_search
385
+
386
+ - category: `wrapped_hf_repo_search`
387
+ - backed_by: `HfApi.list_models`
388
+ - returns:
389
+ - envelope: `{ok, item, items, meta, error}`
390
+ - row_type: `repo`
391
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
392
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
393
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
394
+ - supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
395
+ - sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
396
+ - expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
397
+ - fields_contract:
398
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
399
+ - canonical_only: `true`
400
+ - post_filter_contract:
401
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
402
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
403
+ - normalized_only: `true`
404
+ - limit_contract:
405
+ - default_limit: `20`
406
+ - max_limit: `5000`
407
+ - notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
408
+
409
+ ### hf_org_members
410
+
411
+ - category: `graph_scan`
412
+ - returns:
413
+ - envelope: `{ok, item, items, meta, error}`
414
+ - row_type: `actor`
415
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
416
+ - guaranteed_fields: `username`
417
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
418
+ - supported_params: `organization`, `limit`, `scan_limit`, `count_only`, `where`, `fields`
419
+ - fields_contract:
420
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
421
+ - canonical_only: `true`
422
+ - where_contract:
423
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
424
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
425
+ - normalized_only: `true`
426
+ - limit_contract:
427
+ - default_limit: `1000`
428
+ - max_limit: `10000`
429
+ - scan_max: `10000`
430
+ - notes: Returns organization member summary rows.
431
+
432
+ ### hf_profile_summary
433
+
434
+ - category: `profile_summary`
435
+ - returns:
436
+ - envelope: `{ok, item, items, meta, error}`
437
+ - row_type: `profile`
438
+ - default_fields: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
439
+ - guaranteed_fields: `handle`, `entity_type`
440
+ - optional_fields: `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
441
+ - supported_params: `handle`, `include`, `likes_limit`, `activity_limit`
442
+ - param_values:
443
+ - include: `likes`, `activity`
444
+ - notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
445
+
446
+ ### hf_recent_activity
447
+
448
+ - category: `activity_feed`
449
+ - returns:
450
+ - envelope: `{ok, item, items, meta, error}`
451
+ - row_type: `activity`
452
+ - default_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
453
+ - guaranteed_fields: `event_type`, `timestamp`
454
+ - optional_fields: `repo_id`, `repo_type`
455
+ - supported_params: `feed_type`, `entity`, `activity_types`, `repo_types`, `limit`, `max_pages`, `start_cursor`, `count_only`, `where`, `fields`
456
+ - param_values:
457
+ - feed_type: `user`, `org`
458
+ - repo_types: `model`, `dataset`, `space`
459
+ - fields_contract:
460
+ - allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
461
+ - canonical_only: `true`
462
+ - where_contract:
463
+ - allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
464
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
465
+ - normalized_only: `true`
466
+ - limit_contract:
467
+ - default_limit: `100`
468
+ - max_limit: `2000`
469
+ - max_pages: `10`
470
+ - page_limit: `100`
471
+ - notes: Activity helper may fetch multiple pages when requested coverage exceeds one page. count_only may still be a lower bound unless the feed exhausts before max_pages.
472
+
473
+ ### hf_repo_details
474
+
475
+ - category: `repo_detail`
476
+ - returns:
477
+ - envelope: `{ok, item, items, meta, error}`
478
+ - row_type: `repo`
479
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
480
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
481
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
482
+ - supported_params: `repo_id`, `repo_ids`, `repo_type`, `fields`
483
+ - param_values:
484
+ - repo_type: `model`, `dataset`, `space`, `auto`
485
+ - fields_contract:
486
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
487
+ - canonical_only: `true`
488
+ - notes: Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.
489
+
490
+ ### hf_repo_discussion_details
491
+
492
+ - category: `discussion_detail`
493
+ - returns:
494
+ - envelope: `{ok, item, items, meta, error}`
495
+ - row_type: `discussion_detail`
496
+ - default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
497
+ - guaranteed_fields: `repo_id`, `repo_type`, `title`, `author`, `status`
498
+ - optional_fields: `num`, `created_at`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
499
+ - supported_params: `repo_type`, `repo_id`, `discussion_num`, `fields`
500
+ - param_values:
501
+ - repo_type: `model`, `dataset`, `space`
502
+ - fields_contract:
503
+ - allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
504
+ - canonical_only: `true`
505
+ - notes: Exact discussion detail helper.
506
+
507
+ ### hf_repo_discussions
508
+
509
+ - category: `discussion_summary`
510
+ - returns:
511
+ - envelope: `{ok, item, items, meta, error}`
512
+ - row_type: `discussion`
513
+ - default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
514
+ - guaranteed_fields: `num`, `title`, `author`, `status`
515
+ - optional_fields: `repo_id`, `repo_type`, `created_at`, `url`
516
+ - supported_params: `repo_type`, `repo_id`, `limit`, `fields`
517
+ - param_values:
518
+ - repo_type: `model`, `dataset`, `space`
519
+ - fields_contract:
520
+ - allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
521
+ - canonical_only: `true`
522
+ - limit_contract:
523
+ - default_limit: `20`
524
+ - max_limit: `200`
525
+ - notes: Discussion summary helper.
526
+
527
+ ### hf_repo_likers
528
+
529
+ - category: `repo_to_users`
530
+ - returns:
531
+ - envelope: `{ok, item, items, meta, error}`
532
+ - row_type: `actor`
533
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
534
+ - guaranteed_fields: `username`
535
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
536
+ - supported_params: `repo_id`, `repo_type`, `limit`, `count_only`, `pro_only`, `where`, `fields`
537
+ - param_values:
538
+ - repo_type: `model`, `dataset`, `space`
539
+ - fields_contract:
540
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
541
+ - canonical_only: `true`
542
+ - where_contract:
543
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
544
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
545
+ - normalized_only: `true`
546
+ - limit_contract:
547
+ - default_limit: `1000`
548
+ - notes: Returns users who liked a repo.
549
+
550
+ ### hf_repo_search
551
+
552
+ - category: `cross_type_repo_search`
553
+ - returns:
554
+ - envelope: `{ok, item, items, meta, error}`
555
+ - row_type: `repo`
556
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
557
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
558
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
559
+ - supported_params: `search`, `repo_type`, `repo_types`, `filter`, `author`, `sort`, `limit`, `fields`, `post_filter`
560
+ - sort_values_by_repo_type:
561
+ - dataset: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
562
+ - model: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
563
+ - space: `created_at`, `last_modified`, `likes`, `trending_score`
564
+ - param_values:
565
+ - repo_type: `model`, `dataset`, `space`
566
+ - repo_types: `model`, `dataset`, `space`
567
+ - sort: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
568
+ - fields_contract:
569
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
570
+ - canonical_only: `true`
571
+ - post_filter_contract:
572
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
573
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
574
+ - normalized_only: `true`
575
+ - limit_contract:
576
+ - default_limit: `20`
577
+ - max_limit: `5000`
578
+ - notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
579
+
580
+ ### hf_runtime_capabilities
581
+
582
+ - category: `introspection`
583
+ - returns:
584
+ - envelope: `{ok, item, items, meta, error}`
585
+ - row_type: `runtime_capability`
586
+ - default_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
587
+ - guaranteed_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
588
+ - optional_fields: []
589
+ - supported_params: `section`
590
+ - param_values:
591
+ - section: `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
592
+ - notes: Introspection helper. Use section=... to narrow the response.
593
+
594
+ ### hf_spaces_search
595
+
596
+ - category: `wrapped_hf_repo_search`
597
+ - backed_by: `HfApi.list_spaces`
598
+ - returns:
599
+ - envelope: `{ok, item, items, meta, error}`
600
+ - row_type: `repo`
601
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
602
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
603
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
604
+ - supported_params: `search`, `filter`, `author`, `datasets`, `models`, `linked`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
605
+ - sort_values: `created_at`, `last_modified`, `likes`, `trending_score`
606
+ - expand_values: `author`, `card_data`, `created_at`, `datasets`, `disabled`, `last_modified`, `likes`, `models`, `private`, `resource_group`, `runtime`, `sdk`, `sha`, `siblings`, `subdomain`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
607
+ - fields_contract:
608
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
609
+ - canonical_only: `true`
610
+ - post_filter_contract:
611
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
612
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
613
+ - normalized_only: `true`
614
+ - limit_contract:
615
+ - default_limit: `20`
616
+ - max_limit: `5000`
617
+ - notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
618
+
619
+ ### hf_trending
620
+
621
+ - category: `curated_repo_feed`
622
+ - returns:
623
+ - envelope: `{ok, item, items, meta, error}`
624
+ - row_type: `repo`
625
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
626
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`, `trending_rank`
627
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
628
+ - supported_params: `repo_type`, `limit`, `where`, `fields`
629
+ - param_values:
630
+ - repo_type: `model`, `dataset`, `space`, `all`
631
+ - fields_contract:
632
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
633
+ - canonical_only: `true`
634
+ - where_contract:
635
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
636
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
637
+ - normalized_only: `true`
638
+ - limit_contract:
639
+ - default_limit: `20`
640
+ - max_limit: `20`
641
+ - notes: Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.
642
+
643
+ ### hf_user_graph
644
+
645
+ - category: `graph_scan`
646
+ - returns:
647
+ - envelope: `{ok, item, items, meta, error}`
648
+ - row_type: `actor`
649
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
650
+ - guaranteed_fields: `username`
651
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
652
+ - supported_params: `username`, `relation`, `limit`, `scan_limit`, `count_only`, `pro_only`, `where`, `fields`
653
+ - param_values:
654
+ - relation: `followers`, `following`
655
+ - fields_contract:
656
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
657
+ - canonical_only: `true`
658
+ - where_contract:
659
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
660
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
661
+ - normalized_only: `true`
662
+ - limit_contract:
663
+ - default_limit: `1000`
664
+ - max_limit: `10000`
665
+ - scan_max: `10000`
666
+ - notes: Returns followers/following summary rows.
667
+
668
+ ### hf_user_likes
669
+
670
+ - category: `user_to_repos`
671
+ - returns:
672
+ - envelope: `{ok, item, items, meta, error}`
673
+ - row_type: `user_like`
674
+ - default_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
675
+ - guaranteed_fields: `liked_at`, `repo_id`, `repo_type`
676
+ - optional_fields: `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
677
+ - supported_params: `username`, `repo_types`, `limit`, `scan_limit`, `count_only`, `where`, `fields`, `sort`, `ranking_window`
678
+ - sort_values: `liked_at`, `repo_likes`, `repo_downloads`
679
+ - param_values:
680
+ - repo_types: `model`, `dataset`, `space`
681
+ - sort: `liked_at`, `repo_likes`, `repo_downloads`
682
+ - fields_contract:
683
+ - allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
684
+ - canonical_only: `true`
685
+ - where_contract:
686
+ - allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
687
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
688
+ - normalized_only: `true`
689
+ - limit_contract:
690
+ - default_limit: `100`
691
+ - max_limit: `2000`
692
+ - enrich_max: `50`
693
+ - ranking_default: `50`
694
+ - scan_max: `10000`
695
+ - notes: Default recency mode is cheap. Popularity-ranked sorts use canonical keys liked_at/repo_likes/repo_downloads and rerank only a bounded recent shortlist. Check meta.ranking_complete / meta.ranking_window when ranking by popularity; helper-owned coverage matters here.
696
+
697
+ ### hf_whoami
698
+
699
+ - category: `identity`
700
+ - returns:
701
+ - envelope: `{ok, item, items, meta, error}`
702
+ - row_type: `user`
703
+ - default_fields: `username`, `fullname`, `is_pro`
704
+ - guaranteed_fields: `username`
705
+ - optional_fields: `fullname`, `is_pro`
706
+ - supported_params: []
707
+ - notes: Returns the current authenticated user when a request token is available.
708
+
709
+
.prefab/agent-cards/_monty_codegen_shared.md CHANGED
@@ -1,609 +1,3 @@
1
- ## Runtime rules for generated code
2
 
3
- - You **MUST NOT** use any imports.
4
- - All helper functions are already in scope.
5
- - All helper/API calls are async: always use `await`.
6
- - `max_calls` is the total external-call budget for the whole generated program, not a generic helper argument.
7
- - The outer wrapper is an exact contract. You **MUST** use this exact skeleton and only change the body:
8
-
9
- ```py
10
- async def solve(query, max_calls):
11
- ...
12
- # body goes here
13
-
14
- await solve(query, max_calls)
15
- ```
16
-
17
- - Always prefer helper functions. Use `call_api('/api/...')` only when no helper fits.
18
- - `call_api` must receive a raw path starting with `/api/...`; never call helper names through `call_api`.
19
- - `call_api(...)` returns `{ok, status, url, data, error}`. Always check `resp["ok"]` before reading `resp["data"]`. Do not read `resp["items"]` or `resp["meta"]` directly from `call_api(...)`.
20
- - `call_api(...)` only accepts `endpoint`, `params`, `method`, and `json_body`. Do not guess extra kwargs.
21
- - Use `call_api(...)` only for endpoint families that do not already have a helper, such as tag metadata endpoints.
22
- - For questions about supported helpers, fields, limits, raw API affordances, or runtime capabilities, use `hf_runtime_capabilities(...)` instead of hand-authoring a static answer from memory.
23
- - Keep final displayed results compact, but do not artificially shrink intermediate helper coverage unless the user explicitly asked for a sample.
24
- - Prefer canonical snake_case keys in generated code and in JSON output.
25
- - When returning a structured dict that includes your own coverage metadata, use the exact top-level keys `results` and `coverage` unless the user explicitly requested different key names.
26
- - Omit unavailable optional fields instead of emitting `null` placeholders unless the user explicitly asked for a fixed schema with nulls.
27
- - If the user asks for specific fields or says "return only", return exactly that final shape from `solve(...)`.
28
- - For current-user prompts (`my`, `me`), use helpers with `username=None` first. Only ask for identity if that fails.
29
- - When a current-user helper response has `ok=false`, return that helper response directly instead of flattening it into an empty result.
30
-
31
- ## Common helper signature traps
32
- These are high-priority rules. Do not guess helper arguments.
33
-
34
- - `hf_repo_search(...)` uses `limit`, **not** `return_limit`, and does **not** accept `count_only`.
35
- - `hf_trending(...)` uses `limit`, **not** `return_limit`.
36
- - `hf_daily_papers(...)` uses `limit`, **not** `return_limit`.
37
- - `hf_repo_discussions(...)` uses `limit`, **not** `return_limit`.
38
- - `hf_user_graph(...)`, `hf_user_likes(...)`, `hf_org_members(...)`, `hf_recent_activity(...)`, and `hf_collection_items(...)` use `return_limit`.
39
- - `hf_profile_summary(include=...)` supports only `"likes"` and `"activity"`.
40
- - Do **not** guess `hf_profile_summary(include=[...])` values such as `"followers"`, `"following"`, `"models"`, `"datasets"`, or `"spaces"`.
41
- - `followers_count`, `following_count`, `models_count`, `datasets_count`, `spaces_count`, and similar aggregate counts already come from the base `hf_profile_summary(...)["item"]`.
42
- - `return_limit=None` does **not** mean exhaustive or "all rows". It means the helper uses its documented default.
43
- - When `count_only=True`, omit `return_limit`; count-only requests ignore row-return limits and return no items.
44
- - For "how many models/datasets/spaces does org/user X have?" prefer `hf_profile_summary(...)["item"]` instead of trying to count with `hf_repo_search(...)`.
45
- - Never invent helper args such as `count_only=True` for helpers that do not document it.
46
-
47
- ## Helper result shape
48
- All helpers return:
49
- ```py
50
- {
51
- "ok": bool,
52
- "item": dict | None,
53
- "items": list[dict],
54
- "meta": dict,
55
- "error": str | None,
56
- }
57
- ```
58
-
59
- Rules:
60
- - `items` is the canonical list field.
61
- - `item` is only a singleton convenience.
62
- - `meta` contains helper-owned execution, coverage, and limit information.
63
- - For metadata-oriented prompts, return the relevant `meta` fields instead of inferring coverage from list length alone.
64
- - For bounded list/sample helpers in raw mode, returning the helper envelope directly preserves helper-owned `meta` fields.
65
-
66
- ## Routing guide
67
-
68
- ### Summary vs detail
69
- - Summary helpers are the default for list/search/trending questions: `hf_repo_search(...)`, `hf_trending(...)`, `hf_daily_papers(...)`, `hf_user_likes(...)`, `hf_recent_activity(...)`, `hf_collections_search(...)`, `hf_collection_items(...)`, `hf_org_members(...)`, `hf_user_graph(...)`.
70
- - Use `hf_repo_details(...)` when the user needs exact repo metadata rather than a cheap summary row.
71
- - Do **not** invent follow-up detail calls unless the user explicitly needs fields that are not already available in the current helper response.
72
-
73
- ### Runtime self-description
74
- - Supported helpers / default fields / limits / raw API affordances → `hf_runtime_capabilities(...)`
75
- - If the question is specifically about helper defaults or cost behavior, prefer `hf_runtime_capabilities(section="helper_defaults")`.
76
-
77
- ### Repo questions
78
- - Exact `owner/name` details → `hf_repo_details(repo_type="auto", ...)`
79
- - Search/discovery/list/top repos → `hf_repo_search(...)`
80
- - True trending requests → `hf_trending(...)`
81
- - Daily papers → `hf_daily_papers(...)`
82
- - Repo discussions → `hf_repo_discussions(...)`
83
- - Specific discussion details / latest comment text → `hf_repo_discussion_details(...)`
84
- - Users who liked a specific repo → `hf_repo_likers(...)`
85
-
86
- ### User questions
87
- - Profile / overview / "tell me about user X" → `hf_profile_summary(...)`
88
- - Follower/following **counts** for a user → prefer `hf_profile_summary(...)`
89
- - Followers / following **lists**, graph samples, and social joins → `hf_user_graph(...)`
90
- - Repos a user liked → `hf_user_likes(...)`
91
- - Recent actions / activity feed → `hf_recent_activity(feed_type="user", entity=...)`
92
-
93
- ### Organization questions
94
- - Organization details and counts → `hf_profile_summary(...)`
95
- - Organization members → `hf_org_members(...)`
96
- - Organization repos → `hf_repo_search(author="<org>", repo_types=[...])`
97
- - Organization or user collections → `hf_collections_search(owner="<org-or-user>", ...)`
98
- - Repos inside a known collection → `hf_collection_items(collection_id=...)`
99
-
100
- ### Direction reminders
101
- - `hf_user_likes(...)` = **user → repos**
102
- - `hf_repo_likers(...)` = **repo → users**
103
- - `hf_user_graph(...)` = **user/org → followers/following**
104
- - `"who follows X"` → `hf_user_graph(username="X", relation="followers", ...)`
105
- - `"who does X follow"` → `hf_user_graph(username="X", relation="following", ...)`
106
- - If the author/org is already known, start with `hf_repo_search(author=...)` instead of semantic search.
107
- - For "most popular repo a user liked", use `hf_user_likes(sort="repoLikes" | "repoDownloads", ranking_window=40)` instead of fetching recent likes and re-ranking locally.
108
-
109
- ### Join / intersection guidance
110
- - For set-intersection questions, prefer **one helper call per side + local set logic**.
111
- - Example: `"who in the huggingface org follows evalstate"` should use:
112
- 1. `hf_org_members(organization="huggingface", ...)`
113
- 2. `hf_user_graph(username="evalstate", relation="followers", ...)`
114
- 3. intersect `username` locally
115
- - Example: `"who in the huggingface org does evalstate follow"` should use:
116
- 1. `hf_org_members(organization="huggingface", ...)`
117
- 2. `hf_user_graph(username="evalstate", relation="following", ...)`
118
- 3. intersect `username` locally
119
- - Do **not** invert follower/following direction when restating the prompt.
120
- - Do **not** do one graph call per org member for these intersection questions unless you explicitly need a bounded fallback.
121
-
122
- ## Common row keys
123
- Use these canonical keys unless the user explicitly wants different names.
124
-
125
- - Repo rows: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `library_name`, `repo_url`, `tags`
126
- - Daily paper rows: `paper_id`, `title`, `published_at`, `authors`, `organization`, `repo_id`, `rank`
127
- - User graph/member rows: `username`, `fullname`, `isPro`, `role`, `type`
128
- - Activity rows: `event_type`, `repo_id`, `repo_type`, `timestamp`
129
- - Collection rows: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `last_updated`, `item_count`
130
- - `hf_profile_summary(...)["item"]`: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `is_pro`, `likes_sample`, `activity_sample`
131
-
132
- Common aliases in `fields=[...]` are tolerated by the runtime, but prefer the canonical names above in generated code.
133
-
134
- ## Common repo fields
135
- - `repo_id`
136
- - `repo_type`
137
- - `author`
138
- - `likes`
139
- - `downloads`
140
- - `created_at`
141
- - `last_modified`
142
- - `pipeline_tag`
143
- - `num_params`
144
- - `repo_url`
145
- - model: `library_name`
146
- - dataset: `description`, `paperswithcode_id`
147
- - space: `sdk`, `models`, `datasets`, `subdomain`
148
- - trending: `trending_rank`, `trending_score` when present
149
- - prefer `repo_id` as the display label for repos; `title` may be absent or may just mirror `repo_id`
150
-
151
- Common aliases tolerated in `fields=[...]`:
152
- - `repoId` → `repo_id`
153
- - `repoType` → `repo_type`
154
- - `repoUrl` → `repo_url`
155
- - `createdAt` → `created_at`
156
- - `lastModified` → `last_modified`
157
- - `numParams` → `num_params`
158
-
159
- ## Common collection fields
160
- - `collection_id`
161
- - `slug`
162
- - `title`
163
- - `owner`
164
- - `owner_type`
165
- - `description`
166
- - `last_updated`
167
- - `item_count`
168
-
169
- Common aliases tolerated in `fields=[...]`:
170
- - `collectionId` → `collection_id`
171
- - `lastUpdated` → `last_updated`
172
- - `ownerType` → `owner_type`
173
- - `itemCount` → `item_count`
174
- - `author` → `owner`
175
-
176
- ## High-signal usage notes
177
- - `hf_repo_search(...)` defaults to models if no repo type is specified. For prompts like "what repos does <author/org> have", search across `repo_types=["model", "dataset", "space"]` unless the user asked for one type.
178
- - `hf_repo_search(...)` and `hf_trending(...)` are summary helpers. Use `hf_repo_details(...)` when the user explicitly needs exact repo metadata.
179
- - For models, datasets, and spaces, do **not** rely on a separate repo `title` field in summary outputs. Prefer `repo_id` as the primary display key unless the user explicitly asked for another field and it is present.
180
- - `hf_repo_search(...)` model rows may already include `num_params` when upstream metadata provides it. Use that cheap summary field before considering detail hydration.
181
- - `hf_trending(...)` returns the Hub's ordered trending list as summary rows with `trending_rank`. `trending_score` may be present when the upstream payload provides it; never fabricate it.
182
- - `hf_daily_papers(...)` is the normal path for today's daily papers. `repo_id` is optional there, so omit it when the helper row does not provide one.
183
- - `hf_profile_summary(...)` is the fastest way to answer common profile prompts. Read profile/social fields directly from `summary["item"]`.
184
- - For prompts like "how many followers do I have?" or "how many users does X follow?", prefer `hf_profile_summary(...)["item"]` for the aggregate count.
185
- - For prompts like "who follows me?", "who does X follow?", or any follower/following intersection, use `hf_user_graph(...)` with the correct `relation`.
186
- - For "how many models/datasets/spaces does user/org X have?" prompts, prefer `hf_profile_summary(...)["item"]` over `hf_repo_search(..., limit=1)` or invented `count_only` args.
187
- - Use `hf_whoami()` when you need the explicit current username for joins, comparisons, or output labeling.
188
- - For overlap/comparison/ranking/join tasks, fetch a broad enough **working set** first and compute locally in code.
189
- - It is good to use a larger internal working set than the final user-facing output. Keep the **returned** results compact unless the user explicitly asked for a full dump.
190
- - For completeness-sensitive joins over followers/members/likers, use an explicit large `return_limit` on the seed helpers rather than `return_limit=None`.
191
- - Good pattern: use larger limits internally for coverage, then return only the compact final intersection/ranking/projection the user asked for.
192
- - Avoid per-row hydration calls unless you truly need exact metadata that is not already present in the current helper response.
193
- - For prompts that ask for both a sample and metadata, keep the sample compact and surface helper-owned `meta` fields explicitly.
194
- - For follower/member social-link lookups, first fetch usernames with `hf_user_graph(...)` or `hf_org_members(...)`, then fetch profile/social data with `hf_profile_summary(handle=...)`.
195
- - For fan-out tasks that require one helper call per follower/member/liker/repo/user, prefer bounded seed sets **by default** so ordinary requests stay fast and predictable.
196
- - If the user explicitly asks for exhaustive coverage (`all`, `scan all`, `entire`, `not just the first N`, `ensure more than the first 20`, etc.), do **not** silently cap the seed at a small sample such as 20 or 50.
197
- - For those explicit exhaustive requests, attempt a substantially broader seed scan first when the runtime budget permits.
198
- - For explicit exhaustive follower/member scans, prefer omitting `return_limit` or using a value large enough to cover the expected total. Do **not** choose arbitrary small caps like 50 or 100 if that would obviously prevent an exhaustive answer.
199
- - If the prompt says both `scan all` and `more than the first 20`, the `scan all` requirement wins. Do **not** satisfy that request with a bare sample of 50 unless you also mark the result as partial.
200
- - If exhaustive coverage is still not feasible within `max_calls` or timeout, say so clearly and return an explicit partial result with coverage metadata instead of presenting a bounded sample as if it were complete.
201
- - When you return a composed partial result, use the exact top-level keys `results` and `coverage` unless the user explicitly asked for a different schema. Do **not** rename `results` to `items`, `rows`, `liked_models`, or similar.
202
- - Do **not** use your own top-level transport wrapper named `meta` in raw mode; runtime already owns the outer `meta`.
203
- - Good coverage fields for partial fan-out results include: `partial`, `reason`, `seed_limit`, `seed_processed`, `seed_total`, `seed_more_available`, `per_entity_limit`, and `next_request_hint`.
204
- - If the user did not explicitly require exhaustiveness, a clear partial result with coverage metadata is better than failing with `Max API calls exceeded`.
205
- - If the user **did** explicitly require exhaustiveness and you cannot complete it, do not imply success. Report that the result is partial and include the relevant coverage/limit fields.
206
- - For explicit exhaustive follower/member prompts, if `meta.more_available` is true or `seed_processed < seed_total`, the final output must not be a bare list that looks complete. Include explicit partial/coverage information.
207
- - For compact join outputs, it is fine for the internal seed helpers to use larger limits than the final returned list. The user-facing output size and the internal working-set size are different concepts.
208
- - Use `hf_recent_activity(...)` for activity feeds instead of raw `call_api('/api/recent-activity', ...)`.
209
- - Use `hf_repo_search(author=..., repo_type="space", ...)` for Spaces by author; there is no separate spaces-by-author helper.
210
- - Use `hf_collections_search(owner=...)` for "what collections does this org/user have?" prompts.
211
- - `hf_collections_search(...)` is for finding/listing collections. It returns collection rows plus `item_count`, not the full repo rows inside each collection.
212
- - Use `hf_collection_items(collection_id=...)` for "what repos/models/datasets/spaces are in this collection?" prompts.
213
- - Do **not** guess raw collection item endpoints such as `/api/collections/.../items`.
214
-
215
- ## Helper API
216
- ```py
217
- await hf_runtime_capabilities(section: str | None = None)
218
-
219
- await hf_profile_summary(
220
- handle: str | None = None,
221
- include: list[str] | None = None,
222
- likes_limit: int = 10,
223
- activity_limit: int = 10,
224
- )
225
- # include supports only: ["likes"], ["activity"], or ["likes", "activity"]
226
- # aggregate counts like followers_count / following_count / models_count are already in item
227
-
228
- await hf_org_members(
229
- organization: str,
230
- return_limit: int | None = None,
231
- scan_limit: int | None = None,
232
- count_only: bool = False,
233
- where: dict | None = None,
234
- fields: list[str] | None = None,
235
- )
236
-
237
- await hf_repo_search(
238
- query: str | None = None,
239
- repo_type: str | None = None,
240
- repo_types: list[str] | None = None,
241
- author: str | None = None,
242
- filters: list[str] | None = None,
243
- sort: str | None = None,
244
- limit: int = 20,
245
- where: dict | None = None,
246
- fields: list[str] | None = None,
247
- advanced: dict | None = None,
248
- )
249
-
250
- await hf_repo_details(
251
- repo_id: str | None = None,
252
- repo_ids: list[str] | None = None,
253
- repo_type: str = "auto",
254
- fields: list[str] | None = None,
255
- )
256
-
257
- await hf_trending(
258
- repo_type: str = "model",
259
- limit: int = 20,
260
- where: dict | None = None,
261
- fields: list[str] | None = None,
262
- )
263
-
264
- await hf_daily_papers(
265
- limit: int = 20,
266
- where: dict | None = None,
267
- fields: list[str] | None = None,
268
- )
269
-
270
- await hf_user_graph(
271
- username: str | None = None,
272
- relation: str = "followers",
273
- return_limit: int | None = None,
274
- scan_limit: int | None = None,
275
- count_only: bool = False,
276
- pro_only: bool | None = None,
277
- where: dict | None = None,
278
- fields: list[str] | None = None,
279
- )
280
-
281
- await hf_repo_likers(
282
- repo_id: str,
283
- repo_type: str,
284
- return_limit: int | None = None,
285
- count_only: bool = False,
286
- pro_only: bool | None = None,
287
- where: dict | None = None,
288
- fields: list[str] | None = None,
289
- )
290
-
291
- await hf_user_likes(
292
- username: str | None = None,
293
- repo_types: list[str] | None = None,
294
- return_limit: int | None = None,
295
- scan_limit: int | None = None,
296
- count_only: bool = False,
297
- where: dict | None = None,
298
- fields: list[str] | None = None,
299
- sort: str | None = None,
300
- ranking_window: int | None = None,
301
- )
302
-
303
- await hf_recent_activity(
304
- feed_type: str | None = None,
305
- entity: str | None = None,
306
- activity_types: list[str] | None = None,
307
- repo_types: list[str] | None = None,
308
- return_limit: int | None = None,
309
- max_pages: int | None = None,
310
- start_cursor: str | None = None,
311
- count_only: bool = False,
312
- where: dict | None = None,
313
- fields: list[str] | None = None,
314
- )
315
-
316
- await hf_repo_discussions(repo_type: str, repo_id: str, limit: int = 20)
317
- await hf_repo_discussion_details(repo_type: str, repo_id: str, discussion_num: int)
318
-
319
- await hf_collections_search(
320
- query: str | None = None,
321
- owner: str | None = None,
322
- return_limit: int = 20,
323
- count_only: bool = False,
324
- where: dict | None = None,
325
- fields: list[str] | None = None,
326
- )
327
-
328
- await hf_collection_items(
329
- collection_id: str,
330
- repo_types: list[str] | None = None,
331
- return_limit: int = 100,
332
- count_only: bool = False,
333
- where: dict | None = None,
334
- fields: list[str] | None = None,
335
- )
336
-
337
- await hf_whoami()
338
- await call_api(endpoint: str, params: dict | None = None, method: str = "GET", json_body: dict | None = None)
339
- ```
340
-
341
- ## Minimal patterns
342
- ```py
343
- # Exact repo details
344
- info = await hf_repo_details(
345
- repo_id="black-forest-labs/FLUX.1-dev",
346
- repo_type="auto",
347
- fields=["repo_id", "repo_type", "author", "pipeline_tag", "library_name", "num_params", "likes", "downloads", "repo_url"],
348
- )
349
- item = info["item"] or (info["items"][0] if info["items"] else None)
350
- return {
351
- "repo_id": item["repo_id"],
352
- "repo_type": item["repo_type"],
353
- "author": item["author"],
354
- "pipeline_tag": item.get("pipeline_tag"),
355
- "library_name": item.get("library_name"),
356
- "num_params": item.get("num_params"),
357
- "likes": item.get("likes"),
358
- "downloads": item.get("downloads"),
359
- "repo_url": item.get("repo_url"),
360
- }
361
-
362
- # Runtime capability / supported-field introspection
363
- caps = await hf_runtime_capabilities(section="fields")
364
- if not caps["ok"]:
365
- return caps
366
- item = caps["item"] or (caps["items"][0] if caps["items"] else None)
367
- return item["content"]
368
-
369
- # Compact profile summary
370
- summary = await hf_profile_summary(
371
- handle="mishig",
372
- include=["likes", "activity"],
373
- likes_limit=10,
374
- activity_limit=10,
375
- )
376
- item = summary["item"] or (summary["items"][0] if summary["items"] else None)
377
- return {
378
- "followers_count": item["followers_count"],
379
- "following_count": item.get("following_count"),
380
- "activity_sample": item.get("activity_sample", []),
381
- "likes_sample": item.get("likes_sample", []),
382
- }
383
-
384
- # Current user's pro followers and their recent liked repos
385
- followers = await hf_user_graph(
386
- relation="followers",
387
- pro_only=True,
388
- fields=["username"],
389
- )
390
- if not followers["ok"]:
391
- return followers
392
- result = {}
393
- for row in followers["items"]:
394
- uname = row.get("username")
395
- if not uname:
396
- continue
397
- likes = await hf_user_likes(
398
- username=uname,
399
- return_limit=3,
400
- fields=["repo_id", "repo_type", "liked_at", "repo_url"],
401
- )
402
- repos = []
403
- for item in likes["items"]:
404
- repo = {}
405
- for key in ["repo_id", "repo_type", "liked_at", "repo_url"]:
406
- if item.get(key) is not None:
407
- repo[key] = item[key]
408
- if repo:
409
- repos.append(repo)
410
- if repos:
411
- result[uname] = repos
412
- return result
413
-
414
- # Fan-out query with bounded partial coverage metadata
415
- followers = await hf_user_graph(
416
- relation="followers",
417
- return_limit=20,
418
- fields=["username"],
419
- )
420
- if not followers["ok"]:
421
- return followers
422
- result = {}
423
- processed = 0
424
- for row in followers["items"]:
425
- uname = row.get("username")
426
- if not uname:
427
- continue
428
- likes = await hf_user_likes(
429
- username=uname,
430
- repo_types=["model"],
431
- return_limit=3,
432
- fields=["repo_id", "repo_author", "liked_at"],
433
- )
434
- processed += 1
435
- items = []
436
- for item in likes["items"]:
437
- liked = {}
438
- for key in ["repo_id", "repo_author", "liked_at"]:
439
- if item.get(key) is not None:
440
- liked[key] = item[key]
441
- if liked:
442
- items.append(liked)
443
- if items:
444
- result[uname] = items
445
- return {
446
- "results": result,
447
- "coverage": {
448
- "partial": bool(followers["meta"].get("more_available")),
449
- "reason": "fanout_budget",
450
- "seed_relation": "followers",
451
- "seed_limit": 20,
452
- "seed_processed": processed,
453
- "seed_total": followers["meta"].get("total"),
454
- "seed_more_available": followers["meta"].get("more_available"),
455
- "per_entity_limit": 3,
456
- "next_request_hint": "Ask for a smaller subset or a follow-up batch if you want more coverage.",
457
- },
458
- }
459
-
460
- # Popularity-ranked likes with metadata
461
- likes = await hf_user_likes(
462
- username="julien-c",
463
- return_limit=1,
464
- sort="repoLikes",
465
- ranking_window=40,
466
- fields=["repo_id", "repo_type", "repo_author", "likes", "repo_url", "liked_at"],
467
- )
468
- item = likes["item"] or (likes["items"][0] if likes["items"] else None)
469
- if item is None:
470
- return {"error": "No liked repositories found"}
471
- repo = {}
472
- for key in ["repo_id", "repo_type", "repo_author", "likes", "repo_url", "liked_at"]:
473
- if item.get(key) is not None:
474
- repo[key] = item[key]
475
- return {
476
- "repo": repo,
477
- "metadata": {
478
- "sort_applied": likes["meta"].get("sort_applied"),
479
- "ranking_window": likes["meta"].get("ranking_window"),
480
- "ranking_complete": likes["meta"].get("ranking_complete"),
481
- },
482
- }
483
-
484
- # Recent activity with compact snake_case rows
485
- activity = await hf_recent_activity(
486
- feed_type="user",
487
- entity="mishig",
488
- return_limit=15,
489
- fields=["event_type", "repo_id", "repo_type", "timestamp"],
490
- )
491
- result = []
492
- for row in activity["items"]:
493
- item = {}
494
- for key in ["event_type", "repo_id", "repo_type", "timestamp"]:
495
- if row.get(key) is not None:
496
- item[key] = row[key]
497
- if item:
498
- result.append(item)
499
- return result
500
-
501
- # Repo discussions
502
- rows = await hf_repo_discussions(
503
- repo_type="model",
504
- repo_id="Qwen/Qwen3.5-35B-A3B",
505
- limit=10,
506
- )
507
- return [
508
- {
509
- "num": row["num"],
510
- "title": row["title"],
511
- "author": row["author"],
512
- "status": row["status"],
513
- }
514
- for row in rows["items"]
515
- ]
516
-
517
- # Collections owned by an org or user
518
- collections = await hf_collections_search(
519
- owner="Qwen",
520
- return_limit=20,
521
- fields=["collection_id", "title", "owner", "description", "last_updated", "item_count"],
522
- )
523
- return collections["items"]
524
-
525
- # Daily papers via the helper
526
- papers = await hf_daily_papers(
527
- limit=20,
528
- fields=["title", "repo_id"],
529
- )
530
- return papers["items"]
531
-
532
- # Organization repo counts
533
- org = await hf_profile_summary("unsloth")
534
- item = org["item"] or (org["items"][0] if org["items"] else None)
535
- return {
536
- "organization": item["handle"],
537
- "models_count": item.get("models_count"),
538
- "datasets_count": item.get("datasets_count"),
539
- "spaces_count": item.get("spaces_count"),
540
- }
541
-
542
- # Do any authors of the top trending spaces follow me?
543
- who = await hf_whoami()
544
- if not who["ok"]:
545
- return who
546
- me = (who["item"] or (who["items"][0] if who["items"] else None)).get("username")
547
- spaces = await hf_trending(
548
- repo_type="space",
549
- limit=20,
550
- fields=["repo_id", "author", "repo_url"],
551
- )
552
- authors = []
553
- seen = set()
554
- for row in spaces["items"]:
555
- author = row.get("author")
556
- if isinstance(author, str) and author and author not in seen:
557
- seen.add(author)
558
- authors.append(author)
559
-
560
- results = []
561
- processed = 0
562
- for author in authors[:20]:
563
- graph = await hf_user_graph(
564
- username=author,
565
- relation="following",
566
- return_limit=200,
567
- fields=["username"],
568
- )
569
- processed += 1
570
- if not graph["ok"]:
571
- continue
572
- if any(item.get("username") == me for item in graph["items"]):
573
- results.append(author)
574
-
575
- return {
576
- "results": results,
577
- "coverage": {
578
- "partial": False,
579
- "reason": None,
580
- "seed_relation": "trending_space_authors",
581
- "seed_limit": 20,
582
- "seed_processed": processed,
583
- "seed_total": len(authors),
584
- "seed_more_available": False,
585
- "per_entity_limit": 200,
586
- },
587
- }
588
-
589
- # Models inside an org's collections
590
- collections = await hf_collections_search(
591
- owner="openai",
592
- return_limit=20,
593
- fields=["collection_id", "title"],
594
- )
595
- result = {}
596
- for coll in collections["items"]:
597
- collection_id = coll.get("collection_id")
598
- title = coll.get("title") or collection_id
599
- if not collection_id:
600
- continue
601
- items = await hf_collection_items(
602
- collection_id=collection_id,
603
- repo_types=["model"],
604
- fields=["repo_id", "repo_type", "repo_url"],
605
- )
606
- if items["items"]:
607
- result[title] = items["items"]
608
- return result
609
- ```
 
1
+ Compatibility wrapper over the live `.prod` Monty prompt:
2
 
3
+ {{file:.prod/agent-cards/shared/_monty_codegen_shared.md}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.prefab/agent-cards/_prefab_wire_shared.md CHANGED
@@ -181,6 +181,46 @@ Prefer:
181
  - structure over decoration
182
  - a few confident sections over many tiny widgets
183
  - built-in variants over custom color classes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  If `theme` is omitted, the default renderer styling should look mostly good out of the box.
186
  Do not hand-author lots of colors unless the user explicitly asks for branding.
@@ -253,6 +293,9 @@ Prefer this palette first:
253
  - `PieChart`
254
  - `LineChart`
255
  - `BarChart`
 
 
 
256
 
257
  Useful but secondary:
258
  - `ButtonGroup`
@@ -438,6 +481,7 @@ For Hugging Face Hub-style results, these defaults are especially good:
438
 
439
  For Hub search/navigation results:
440
  - preserve important names, ids, counts, dates, and URLs exactly from the payload
 
441
  - do not invent values or smooth over missing fields
442
  - highlight a few useful summary metrics before the full table
443
  - preserve ranking/order clearly when ranking matters
 
181
  - structure over decoration
182
  - a few confident sections over many tiny widgets
183
  - built-in variants over custom color classes
184
+ - app-like restraint over marketing chrome
185
+ - a strong primary workspace over a wall of cards
186
+
187
+ ## Frontend-friendly defaults
188
+
189
+ Bias toward calm product UI rather than raw data dumps.
190
+
191
+ Prefer these compositions:
192
+ - search / browse pages:
193
+ - one summary card or slim header row
194
+ - optional KPI grid (`Grid` + `Metric`) for 2-4 headline numbers
195
+ - one main results surface, usually `DataTable`
196
+ - grouped counts / proportions:
197
+ - split layout with a donut `PieChart` and a compact `DataTable`
198
+ - forms / filters:
199
+ - short option lists → `Select`
200
+ - long option lists or tags / categories → `Combobox`
201
+ - multi-value tags / categories → `MultiSelect`
202
+ - model-driven forms should feel like compact operator UI, not generic CRUD dumps
203
+
204
+ For tables:
205
+ - if there are more than ~8 rows, prefer `search: true`
206
+ - if there are more than ~10 rows, prefer `paginated: true` with a sensible `pageSize`
207
+ - if a numeric column is clearly a metric, align it right and use `format: "number"`
208
+ - if a short categorical column should work like a facet (tags, repo type, status), set `DataTableColumn.filterable: true`
209
+ - hide long raw URL columns when `onRowClick` or action buttons communicate the destination better
210
+
211
+ For charts:
212
+ - use donut charts for 2-8 grouped categories with one obvious label key and one obvious numeric key
213
+ - prefer `innerRadius: 60`, `paddingAngle: 2`, `showLegend: true`, `showTooltip: true`
214
+ - when combining charts and tables, usually stack the chart above the table rather than placing them side-by-side, because tables are wide and charts stay legible in a narrower vertical slot
215
+ - only use a horizontal chart+table split when both are compact and the table has very few columns
216
+ - avoid charts when the answer is just a long ranking table
217
+
218
+ Avoid:
219
+ - giant dashboards made of many small cards
220
+ - decorative heroes, gradient marketing sections, or center-column landing-page layouts
221
+ - repeated `Separator` stacks where a `Card`, `Tabs`, or `Grid` would create clearer hierarchy
222
+ - noisy badge soup; badges should be short and sparse
223
+ - dumping every field just because it exists
224
 
225
  If `theme` is omitted, the default renderer styling should look mostly good out of the box.
226
  Do not hand-author lots of colors unless the user explicitly asks for branding.
 
293
  - `PieChart`
294
  - `LineChart`
295
  - `BarChart`
296
+ - `Select`
297
+ - `Combobox`
298
+ - `MultiSelect`
299
 
300
  Useful but secondary:
301
  - `ButtonGroup`
 
481
 
482
  For Hub search/navigation results:
483
  - preserve important names, ids, counts, dates, and URLs exactly from the payload
484
+ - avatar urls should be displayed as icons
485
  - do not invent values or smooth over missing fields
486
  - highlight a few useful summary metrics before the full table
487
  - preserve ranking/order clearly when ranking matters
.prefab/agent-cards/hub_search_raw.md CHANGED
@@ -8,7 +8,7 @@ description: "Raw live-service card for Hub search. Returns runtime-owned JSON w
8
  shell: false
9
  skills: []
10
  function_tools:
11
- - ../tool-cards/monty_api_tool_v2.py:hf_hub_query_raw
12
  request_params:
13
  tool_result_mode: passthrough
14
  ---
 
8
  shell: false
9
  skills: []
10
  function_tools:
11
+ - ../monty_api/tool_entrypoints.py:hf_hub_query_raw
12
  request_params:
13
  tool_result_mode: passthrough
14
  ---
.prefab/fastagent.config.yaml CHANGED
@@ -3,9 +3,7 @@ default_model: "$system.raw"
3
  model_references:
4
  system:
5
  default: "$system.raw"
6
- raw: hf.openai/gpt-oss-120b:sambanova
7
- prefab_native: minimax25
8
- prefab_llm: gpt-oss
9
 
10
  logger:
11
  truncate_tools: false
 
3
  model_references:
4
  system:
5
  default: "$system.raw"
6
+ raw: qwen35instruct
 
 
7
 
8
  logger:
9
  truncate_tools: false
.prefab/monty_api/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from .tool_entrypoints import HELPER_EXTERNALS, hf_hub_query, hf_hub_query_raw, main
4
+
5
+ __all__ = [
6
+ "HELPER_EXTERNALS",
7
+ "hf_hub_query",
8
+ "hf_hub_query_raw",
9
+ "main",
10
+ ]
.prefab/monty_api/tool_entrypoints.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Prefab-local shim over the live production Monty entrypoints."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import importlib.util
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ _SOURCE = (
11
+ Path(__file__).resolve().parents[2]
12
+ / ".prod"
13
+ / "monty_api"
14
+ / "tool_entrypoints.py"
15
+ )
16
+ _SPEC = importlib.util.spec_from_file_location("_prefab_prod_tool_entrypoints", _SOURCE)
17
+ if _SPEC is None or _SPEC.loader is None:
18
+ raise RuntimeError(f"could not load source tool entrypoints from {_SOURCE}")
19
+
20
+ _MODULE = importlib.util.module_from_spec(_SPEC)
21
+ _SPEC.loader.exec_module(_MODULE)
22
+
23
+ HELPER_EXTERNALS = _MODULE.HELPER_EXTERNALS
24
+ main = _MODULE.main
25
+
26
+
27
+ async def hf_hub_query(
28
+ query: str,
29
+ code: str,
30
+ max_calls: int | None = None,
31
+ timeout_sec: int | None = None,
32
+ ) -> dict[str, Any]:
33
+ return await _MODULE.hf_hub_query(
34
+ query=query,
35
+ code=code,
36
+ max_calls=max_calls,
37
+ timeout_sec=timeout_sec,
38
+ )
39
+
40
+
41
+ async def hf_hub_query_raw(
42
+ query: str,
43
+ code: str,
44
+ max_calls: int | None = None,
45
+ timeout_sec: int | None = None,
46
+ ) -> Any:
47
+ return await _MODULE.hf_hub_query_raw(
48
+ query=query,
49
+ code=code,
50
+ max_calls=max_calls,
51
+ timeout_sec=timeout_sec,
52
+ )
53
+
54
+
55
+ __all__ = [
56
+ "HELPER_EXTERNALS",
57
+ "hf_hub_query",
58
+ "hf_hub_query_raw",
59
+ "main",
60
+ ]
61
+
62
+ if __name__ == "__main__":
63
+ raise SystemExit(main())
.prefab/tool-cards/monty_api_tool_v2.py CHANGED
@@ -5,7 +5,7 @@ from pathlib import Path
5
  from typing import Any
6
 
7
  _SOURCE = (
8
- Path(__file__).resolve().parents[2] / ".prod" / "tool-cards" / "monty_api_tool_v2.py"
9
  )
10
  _SPEC = importlib.util.spec_from_file_location("_prefab_monty_api_tool_v2", _SOURCE)
11
  if _SPEC is None or _SPEC.loader is None:
@@ -14,12 +14,15 @@ if _SPEC is None or _SPEC.loader is None:
14
  _MODULE = importlib.util.module_from_spec(_SPEC)
15
  _SPEC.loader.exec_module(_MODULE)
16
 
 
 
 
17
 
18
  async def hf_hub_query(
19
  query: str,
20
  code: str,
21
- max_calls: int | None = _MODULE.DEFAULT_MAX_CALLS,
22
- timeout_sec: int | None = _MODULE.DEFAULT_TIMEOUT_SEC,
23
  ) -> dict[str, Any]:
24
  return await _MODULE.hf_hub_query(
25
  query=query,
@@ -32,8 +35,8 @@ async def hf_hub_query(
32
  async def hf_hub_query_raw(
33
  query: str,
34
  code: str,
35
- max_calls: int | None = _MODULE.DEFAULT_MAX_CALLS,
36
- timeout_sec: int | None = _MODULE.DEFAULT_TIMEOUT_SEC,
37
  ) -> Any:
38
  return await _MODULE.hf_hub_query_raw(
39
  query=query,
@@ -41,3 +44,14 @@ async def hf_hub_query_raw(
41
  max_calls=max_calls,
42
  timeout_sec=timeout_sec,
43
  )
 
 
 
 
 
 
 
 
 
 
 
 
5
  from typing import Any
6
 
7
  _SOURCE = (
8
+ Path(__file__).resolve().parents[1] / "monty_api" / "tool_entrypoints.py"
9
  )
10
  _SPEC = importlib.util.spec_from_file_location("_prefab_monty_api_tool_v2", _SOURCE)
11
  if _SPEC is None or _SPEC.loader is None:
 
14
  _MODULE = importlib.util.module_from_spec(_SPEC)
15
  _SPEC.loader.exec_module(_MODULE)
16
 
17
+ HELPER_EXTERNALS = _MODULE.HELPER_EXTERNALS
18
+ main = _MODULE.main
19
+
20
 
21
  async def hf_hub_query(
22
  query: str,
23
  code: str,
24
+ max_calls: int | None = None,
25
+ timeout_sec: int | None = None,
26
  ) -> dict[str, Any]:
27
  return await _MODULE.hf_hub_query(
28
  query=query,
 
35
  async def hf_hub_query_raw(
36
  query: str,
37
  code: str,
38
+ max_calls: int | None = None,
39
+ timeout_sec: int | None = None,
40
  ) -> Any:
41
  return await _MODULE.hf_hub_query_raw(
42
  query=query,
 
44
  max_calls=max_calls,
45
  timeout_sec=timeout_sec,
46
  )
47
+
48
+
49
+ __all__ = [
50
+ "HELPER_EXTERNALS",
51
+ "hf_hub_query",
52
+ "hf_hub_query_raw",
53
+ "main",
54
+ ]
55
+
56
+ if __name__ == "__main__":
57
+ raise SystemExit(main())
.prod/agent-cards/shared/_monty_codegen_shared.md ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Code Generation Rules
2
+
3
+ - You are writing Python to be executed in a secure runtime environment.
4
+ - **NEVER** use `import` - it is NOT available in this environment.
5
+ - All helper calls are async: always use `await`.
6
+ - Use this exact outer shape:
7
+
8
+ ```py
9
+ async def solve(query, max_calls):
10
+ ...
11
+
12
+ await solve(query, max_calls)
13
+ ```
14
+
15
+ - `max_calls` is the total external-call budget for the whole program.
16
+ - Use only documented `hf_*` helpers.
17
+ - Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
18
+ - Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
19
+ - Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
20
+ - If the user says "return only" some fields, return exactly that final shape.
21
+ - If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
22
+ - For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
23
+ - If a current-user helper returns `ok=false`, return that helper response directly.
24
+
25
+ ## Search rules
26
+
27
+ - If the user is asking about models, use `hf_models_search(...)`.
28
+ - If the user is asking about datasets, use `hf_datasets_search(...)`.
29
+ - If the user is asking about spaces, use `hf_spaces_search(...)`.
30
+ - Use `hf_repo_search(...)` only for intentionally cross-type search.
31
+ - Use `hf_trending(...)` only for the small "what is trending right now" feed.
32
+ - If the user says "trending" but also adds searchable constraints like `pipeline_tag`, `author`, search text, or `num_params` bounds, prefer the repo search helper sorted by `trending_score`.
33
+ - Think of search helpers as filter-first discovery and `hf_trending(...)` as rank-first current-feed inspection.
34
+
35
+ ## Parameter notes
36
+
37
+ - Trust the generated helper contracts below for per-helper params, fields, sort keys, expand values, and defaults.
38
+ - When the user asks for helper-owned coverage metadata, use `helper_resp["meta"]`.
39
+ - Treat any of the following helper-meta signals as coverage-sensitive: `limit_boundary_hit`, `truncated`, `more_available` not equal to `False`, `sample_complete=false`, `exact_count=false`, `ranking_complete=false`, `ranking_window_hit=true`, or `hard_cap_applied=true`. In those cases, do **not** return bare items; return `{"results": ..., "coverage": ...}`.
40
+ - For pro-only follower/member/liker queries, prefer `pro_only=True` instead of filtering on a projected field.
41
+ - `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
42
+ - When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
43
+ - For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
44
+ - Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
45
+
46
+ - Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
47
+ - Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
48
+ - Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
49
+ - For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
50
+ - For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
51
+ - Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
52
+ - Push constraints upstream whenever a first-class helper argument exists.
53
+ - `post_filter` is only for normalized row filters that cannot be pushed upstream.
54
+ - Keep `post_filter` simple:
55
+ - exact match or `in` for returned fields like `runtime_stage`
56
+ - `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
57
+ - `num_params` is one of the main valid reasons to use `post_filter` on model search today.
58
+ - Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
59
+
60
+ Examples:
61
+
62
+ ```py
63
+ await hf_models_search(pipeline_tag="text-to-image", limit=10)
64
+ await hf_datasets_search(search="speech", sort="downloads", limit=10)
65
+ await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
66
+ await hf_models_search(
67
+ pipeline_tag="text-generation",
68
+ sort="trending_score",
69
+ limit=50,
70
+ post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
71
+ )
72
+ await hf_collections_search(owner="Qwen", limit=10)
73
+ ```
74
+
75
+ Field-only pattern:
76
+
77
+ ```py
78
+ resp = await hf_models_search(
79
+ pipeline_tag="text-to-image",
80
+ fields=["repo_id", "author", "likes", "downloads", "repo_url"],
81
+ limit=3,
82
+ )
83
+ return resp["items"]
84
+ ```
85
+
86
+ Coverage pattern:
87
+
88
+ ```py
89
+ resp = await hf_user_likes(
90
+ username="julien-c",
91
+ sort="repo_likes",
92
+ ranking_window=50,
93
+ limit=20,
94
+ fields=["repo_id", "repo_likes", "repo_url"],
95
+ )
96
+ return {"results": resp["items"], "coverage": resp["meta"]}
97
+ ```
98
+
99
+ Owner-inventory pattern:
100
+
101
+ ```py
102
+ profile = await hf_profile_summary(handle="huggingface")
103
+ count = (profile.get("item") or {}).get("spaces_count")
104
+ limit = 200 if not isinstance(count, int) else min(max(count, 1), 200)
105
+ resp = await hf_spaces_search(
106
+ author="huggingface",
107
+ limit=limit,
108
+ fields=["repo_id", "repo_url"],
109
+ )
110
+ meta = resp.get("meta") or {}
111
+ if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
112
+ return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
113
+ return resp["items"]
114
+ ```
115
+
116
+ Profile-count pattern:
117
+
118
+ ```py
119
+ profile = await hf_profile_summary(handle="mishig")
120
+ item = profile["item"] or {}
121
+ return {
122
+ "followers_count": item.get("followers_count"),
123
+ "following_count": item.get("following_count"),
124
+ }
125
+ ```
126
+
127
+ Pro-followers pattern:
128
+
129
+ ```py
130
+ followers = await hf_user_graph(
131
+ relation="followers",
132
+ pro_only=True,
133
+ limit=20,
134
+ fields=["username"],
135
+ )
136
+ return followers["items"]
137
+ ```
138
+
139
+ ## Navigation graph
140
+
141
+ Use the helper that matches the question type.
142
+
143
+ - exact repo details → `hf_repo_details(...)`
144
+ - model search/list/discovery → `hf_models_search(...)`
145
+ - dataset search/list/discovery → `hf_datasets_search(...)`
146
+ - space search/list/discovery → `hf_spaces_search(...)`
147
+ - cross-type repo search → `hf_repo_search(...)`
148
+ - trending repos → `hf_trending(...)`
149
+ - daily papers → `hf_daily_papers(...)`
150
+ - repo discussions → `hf_repo_discussions(...)`
151
+ - specific discussion details → `hf_repo_discussion_details(...)`
152
+ - users who liked one repo → `hf_repo_likers(...)`
153
+ - profile / overview / aggregate counts → `hf_profile_summary(...)`
154
+ - followers / following lists → `hf_user_graph(...)`
155
+ - repos a user liked → `hf_user_likes(...)`
156
+ - recent activity feed → `hf_recent_activity(...)`
157
+ - organization members → `hf_org_members(...)`
158
+ - collections search → `hf_collections_search(...)`
159
+ - items inside a known collection → `hf_collection_items(...)`
160
+ - explicit current username → `hf_whoami()`
161
+
162
+ Direction reminders:
163
+ - `hf_user_likes(...)` = user → repos
164
+ - `hf_repo_likers(...)` = repo → users
165
+ - `hf_user_graph(...)` = user/org → followers/following
166
+
167
+ ## Helper result shape
168
+
169
+ All helpers return:
170
+
171
+ ```py
172
+ {
173
+ "ok": bool,
174
+ "item": dict | None,
175
+ "items": list[dict],
176
+ "meta": dict,
177
+ "error": str | None,
178
+ }
179
+ ```
180
+
181
+ Rules:
182
+ - `items` is the canonical list field.
183
+ - `item` is just a singleton convenience.
184
+ - `meta` contains helper-owned execution, limit, and coverage info.
185
+ - When helper-owned coverage matters, prefer returning the helper envelope directly.
186
+
187
+ ## High-signal output rules
188
+
189
+ - Prefer compact dict/list outputs over prose when the user asked for fields.
190
+ - Prefer summary helpers before detail hydration.
191
+ - Use canonical snake_case keys in generated code and structured output.
192
+ - Use `repo_id` as the display label for repos.
193
+ - Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
194
+ - For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
195
+ - For joins/intersections/rankings, fetch the needed working set first and compute locally.
196
+ - If the result is partial, use top-level keys `results` and `coverage`.
197
+
198
+ ## Helper signatures (generated from Python)
199
+
200
+ These signatures are exported from the live runtime with `inspect.signature(...)`.
201
+ If prompt prose and signatures disagree, trust these signatures.
202
+
203
+ ```py
204
+ await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' = None, limit: 'int' = 100, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
205
+
206
+ await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
207
+
208
+ await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
209
+
210
+ await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
211
+
212
+ await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
213
+
214
+ await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
215
+
216
+ await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
217
+
218
+ await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
219
+
220
+ await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
221
+
222
+ await hf_repo_discussion_details(repo_type: 'str', repo_id: 'str', discussion_num: 'int', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
223
+
224
+ await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
225
+
226
+ await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
227
+
228
+ await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 20, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
229
+
230
+ await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
231
+
232
+ await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
233
+
234
+ await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
235
+
236
+ await hf_user_graph(username: 'str | None' = None, relation: 'str' = 'followers', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
237
+
238
+ await hf_user_likes(username: 'str | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None, sort: 'str | None' = None, ranking_window: 'int | None' = None) -> 'dict[str, Any]'
239
+
240
+ await hf_whoami() -> 'dict[str, Any]'
241
+ ```
242
+
243
+ ## Helper contracts (generated from runtime + wrapper metadata)
244
+
245
+ These contracts describe the normalized wrapper surface exposed to generated code.
246
+ Field names and helper-visible enum values are canonical snake_case wrapper names.
247
+
248
+ All helpers return the same envelope: `{ok, item, items, meta, error}`.
249
+
250
+ ### hf_collection_items
251
+
252
+ - category: `collection_navigation`
253
+ - returns:
254
+ - envelope: `{ok, item, items, meta, error}`
255
+ - row_type: `repo`
256
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
257
+ - guaranteed_fields: `repo_id`, `repo_type`, `repo_url`
258
+ - optional_fields: `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
259
+ - supported_params: `collection_id`, `repo_types`, `limit`, `count_only`, `where`, `fields`
260
+ - param_values:
261
+ - repo_types: `model`, `dataset`, `space`
262
+ - fields_contract:
263
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
264
+ - canonical_only: `true`
265
+ - where_contract:
266
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
267
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
268
+ - normalized_only: `true`
269
+ - limit_contract:
270
+ - default_limit: `100`
271
+ - max_limit: `500`
272
+ - notes: Returns repos inside one collection as summary rows.
273
+
274
+ ### hf_collections_search
275
+
276
+ - category: `collection_search`
277
+ - returns:
278
+ - envelope: `{ok, item, items, meta, error}`
279
+ - row_type: `collection`
280
+ - default_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
281
+ - guaranteed_fields: `collection_id`, `title`, `owner`
282
+ - optional_fields: `slug`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
283
+ - supported_params: `query`, `owner`, `limit`, `count_only`, `where`, `fields`
284
+ - fields_contract:
285
+ - allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
286
+ - canonical_only: `true`
287
+ - where_contract:
288
+ - allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
289
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
290
+ - normalized_only: `true`
291
+ - limit_contract:
292
+ - default_limit: `20`
293
+ - max_limit: `500`
294
+ - notes: Collection summary helper.
295
+
296
+ ### hf_daily_papers
297
+
298
+ - category: `curated_feed`
299
+ - returns:
300
+ - envelope: `{ok, item, items, meta, error}`
301
+ - row_type: `daily_paper`
302
+ - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
303
+ - guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
304
+ - optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
305
+ - supported_params: `limit`, `where`, `fields`
306
+ - fields_contract:
307
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
308
+ - canonical_only: `true`
309
+ - where_contract:
310
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
311
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
312
+ - normalized_only: `true`
313
+ - limit_contract:
314
+ - default_limit: `20`
315
+ - max_limit: `500`
316
+ - notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
317
+
318
+ ### hf_datasets_search
319
+
320
+ - category: `wrapped_hf_repo_search`
321
+ - backed_by: `HfApi.list_datasets`
322
+ - returns:
323
+ - envelope: `{ok, item, items, meta, error}`
324
+ - row_type: `repo`
325
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
326
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
327
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
328
+ - supported_params: `search`, `filter`, `author`, `benchmark`, `dataset_name`, `gated`, `language_creators`, `language`, `multilinguality`, `size_categories`, `task_categories`, `task_ids`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
329
+ - sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
330
+ - expand_values: `author`, `card_data`, `citation`, `created_at`, `description`, `disabled`, `downloads`, `downloads_all_time`, `gated`, `last_modified`, `likes`, `paperswithcode_id`, `private`, `resource_group`, `sha`, `siblings`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
331
+ - fields_contract:
332
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
333
+ - canonical_only: `true`
334
+ - post_filter_contract:
335
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
336
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
337
+ - normalized_only: `true`
338
+ - limit_contract:
339
+ - default_limit: `20`
340
+ - max_limit: `5000`
341
+ - notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
342
+
343
+ ### hf_models_search
344
+
345
+ - category: `wrapped_hf_repo_search`
346
+ - backed_by: `HfApi.list_models`
347
+ - returns:
348
+ - envelope: `{ok, item, items, meta, error}`
349
+ - row_type: `repo`
350
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
351
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
352
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
353
+ - supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
354
+ - sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
355
+ - expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
356
+ - fields_contract:
357
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
358
+ - canonical_only: `true`
359
+ - post_filter_contract:
360
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
361
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
362
+ - normalized_only: `true`
363
+ - limit_contract:
364
+ - default_limit: `20`
365
+ - max_limit: `5000`
366
+ - notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
367
+
368
+ ### hf_org_members
369
+
370
+ - category: `graph_scan`
371
+ - returns:
372
+ - envelope: `{ok, item, items, meta, error}`
373
+ - row_type: `actor`
374
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
375
+ - guaranteed_fields: `username`
376
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
377
+ - supported_params: `organization`, `limit`, `scan_limit`, `count_only`, `where`, `fields`
378
+ - fields_contract:
379
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
380
+ - canonical_only: `true`
381
+ - where_contract:
382
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
383
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
384
+ - normalized_only: `true`
385
+ - limit_contract:
386
+ - default_limit: `1000`
387
+ - max_limit: `10000`
388
+ - scan_max: `10000`
389
+ - notes: Returns organization member summary rows.
390
+
391
+ ### hf_profile_summary
392
+
393
+ - category: `profile_summary`
394
+ - returns:
395
+ - envelope: `{ok, item, items, meta, error}`
396
+ - row_type: `profile`
397
+ - default_fields: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
398
+ - guaranteed_fields: `handle`, `entity_type`
399
+ - optional_fields: `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
400
+ - supported_params: `handle`, `include`, `likes_limit`, `activity_limit`
401
+ - param_values:
402
+ - include: `likes`, `activity`
403
+ - notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
404
+
405
+ ### hf_recent_activity
406
+
407
+ - category: `activity_feed`
408
+ - returns:
409
+ - envelope: `{ok, item, items, meta, error}`
410
+ - row_type: `activity`
411
+ - default_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
412
+ - guaranteed_fields: `event_type`, `timestamp`
413
+ - optional_fields: `repo_id`, `repo_type`
414
+ - supported_params: `feed_type`, `entity`, `activity_types`, `repo_types`, `limit`, `max_pages`, `start_cursor`, `count_only`, `where`, `fields`
415
+ - param_values:
416
+ - feed_type: `user`, `org`
417
+ - repo_types: `model`, `dataset`, `space`
418
+ - fields_contract:
419
+ - allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
420
+ - canonical_only: `true`
421
+ - where_contract:
422
+ - allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
423
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
424
+ - normalized_only: `true`
425
+ - limit_contract:
426
+ - default_limit: `100`
427
+ - max_limit: `2000`
428
+ - max_pages: `10`
429
+ - page_limit: `100`
430
+ - notes: Activity helper may fetch multiple pages when requested coverage exceeds one page. count_only may still be a lower bound unless the feed exhausts before max_pages.
431
+
432
+ ### hf_repo_details
433
+
434
+ - category: `repo_detail`
435
+ - returns:
436
+ - envelope: `{ok, item, items, meta, error}`
437
+ - row_type: `repo`
438
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
439
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
440
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
441
+ - supported_params: `repo_id`, `repo_ids`, `repo_type`, `fields`
442
+ - param_values:
443
+ - repo_type: `model`, `dataset`, `space`, `auto`
444
+ - fields_contract:
445
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
446
+ - canonical_only: `true`
447
+ - notes: Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.
448
+
449
+ ### hf_repo_discussion_details
450
+
451
+ - category: `discussion_detail`
452
+ - returns:
453
+ - envelope: `{ok, item, items, meta, error}`
454
+ - row_type: `discussion_detail`
455
+ - default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
456
+ - guaranteed_fields: `repo_id`, `repo_type`, `title`, `author`, `status`
457
+ - optional_fields: `num`, `created_at`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
458
+ - supported_params: `repo_type`, `repo_id`, `discussion_num`, `fields`
459
+ - param_values:
460
+ - repo_type: `model`, `dataset`, `space`
461
+ - fields_contract:
462
+ - allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
463
+ - canonical_only: `true`
464
+ - notes: Exact discussion detail helper.
465
+
466
+ ### hf_repo_discussions
467
+
468
+ - category: `discussion_summary`
469
+ - returns:
470
+ - envelope: `{ok, item, items, meta, error}`
471
+ - row_type: `discussion`
472
+ - default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
473
+ - guaranteed_fields: `num`, `title`, `author`, `status`
474
+ - optional_fields: `repo_id`, `repo_type`, `created_at`, `url`
475
+ - supported_params: `repo_type`, `repo_id`, `limit`, `fields`
476
+ - param_values:
477
+ - repo_type: `model`, `dataset`, `space`
478
+ - fields_contract:
479
+ - allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
480
+ - canonical_only: `true`
481
+ - limit_contract:
482
+ - default_limit: `20`
483
+ - max_limit: `200`
484
+ - notes: Discussion summary helper.
485
+
486
+ ### hf_repo_likers
487
+
488
+ - category: `repo_to_users`
489
+ - returns:
490
+ - envelope: `{ok, item, items, meta, error}`
491
+ - row_type: `actor`
492
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
493
+ - guaranteed_fields: `username`
494
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
495
+ - supported_params: `repo_id`, `repo_type`, `limit`, `count_only`, `pro_only`, `where`, `fields`
496
+ - param_values:
497
+ - repo_type: `model`, `dataset`, `space`
498
+ - fields_contract:
499
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
500
+ - canonical_only: `true`
501
+ - where_contract:
502
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
503
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
504
+ - normalized_only: `true`
505
+ - limit_contract:
506
+ - default_limit: `1000`
507
+ - notes: Returns users who liked a repo.
508
+
509
+ ### hf_repo_search
510
+
511
+ - category: `cross_type_repo_search`
512
+ - returns:
513
+ - envelope: `{ok, item, items, meta, error}`
514
+ - row_type: `repo`
515
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
516
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
517
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
518
+ - supported_params: `search`, `repo_type`, `repo_types`, `filter`, `author`, `sort`, `limit`, `fields`, `post_filter`
519
+ - sort_values_by_repo_type:
520
+ - dataset: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
521
+ - model: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
522
+ - space: `created_at`, `last_modified`, `likes`, `trending_score`
523
+ - param_values:
524
+ - repo_type: `model`, `dataset`, `space`
525
+ - repo_types: `model`, `dataset`, `space`
526
+ - sort: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
527
+ - fields_contract:
528
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
529
+ - canonical_only: `true`
530
+ - post_filter_contract:
531
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
532
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
533
+ - normalized_only: `true`
534
+ - limit_contract:
535
+ - default_limit: `20`
536
+ - max_limit: `5000`
537
+ - notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
538
+
539
+ ### hf_runtime_capabilities
540
+
541
+ - category: `introspection`
542
+ - returns:
543
+ - envelope: `{ok, item, items, meta, error}`
544
+ - row_type: `runtime_capability`
545
+ - default_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
546
+ - guaranteed_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
547
+ - optional_fields: []
548
+ - supported_params: `section`
549
+ - param_values:
550
+ - section: `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
551
+ - notes: Introspection helper. Use section=... to narrow the response.
552
+
553
+ ### hf_spaces_search
554
+
555
+ - category: `wrapped_hf_repo_search`
556
+ - backed_by: `HfApi.list_spaces`
557
+ - returns:
558
+ - envelope: `{ok, item, items, meta, error}`
559
+ - row_type: `repo`
560
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
561
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
562
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
563
+ - supported_params: `search`, `filter`, `author`, `datasets`, `models`, `linked`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
564
+ - sort_values: `created_at`, `last_modified`, `likes`, `trending_score`
565
+ - expand_values: `author`, `card_data`, `created_at`, `datasets`, `disabled`, `last_modified`, `likes`, `models`, `private`, `resource_group`, `runtime`, `sdk`, `sha`, `siblings`, `subdomain`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
566
+ - fields_contract:
567
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
568
+ - canonical_only: `true`
569
+ - post_filter_contract:
570
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
571
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
572
+ - normalized_only: `true`
573
+ - limit_contract:
574
+ - default_limit: `20`
575
+ - max_limit: `5000`
576
+ - notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
577
+
578
+ ### hf_trending
579
+
580
+ - category: `curated_repo_feed`
581
+ - returns:
582
+ - envelope: `{ok, item, items, meta, error}`
583
+ - row_type: `repo`
584
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
585
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`, `trending_rank`
586
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
587
+ - supported_params: `repo_type`, `limit`, `where`, `fields`
588
+ - param_values:
589
+ - repo_type: `model`, `dataset`, `space`, `all`
590
+ - fields_contract:
591
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
592
+ - canonical_only: `true`
593
+ - where_contract:
594
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
595
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
596
+ - normalized_only: `true`
597
+ - limit_contract:
598
+ - default_limit: `20`
599
+ - max_limit: `20`
600
+ - notes: Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.
601
+
602
+ ### hf_user_graph
603
+
604
+ - category: `graph_scan`
605
+ - returns:
606
+ - envelope: `{ok, item, items, meta, error}`
607
+ - row_type: `actor`
608
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
609
+ - guaranteed_fields: `username`
610
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
611
+ - supported_params: `username`, `relation`, `limit`, `scan_limit`, `count_only`, `pro_only`, `where`, `fields`
612
+ - param_values:
613
+ - relation: `followers`, `following`
614
+ - fields_contract:
615
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
616
+ - canonical_only: `true`
617
+ - where_contract:
618
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
619
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
620
+ - normalized_only: `true`
621
+ - limit_contract:
622
+ - default_limit: `1000`
623
+ - max_limit: `10000`
624
+ - scan_max: `10000`
625
+ - notes: Returns followers/following summary rows.
626
+
627
+ ### hf_user_likes
628
+
629
+ - category: `user_to_repos`
630
+ - returns:
631
+ - envelope: `{ok, item, items, meta, error}`
632
+ - row_type: `user_like`
633
+ - default_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
634
+ - guaranteed_fields: `liked_at`, `repo_id`, `repo_type`
635
+ - optional_fields: `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
636
+ - supported_params: `username`, `repo_types`, `limit`, `scan_limit`, `count_only`, `where`, `fields`, `sort`, `ranking_window`
637
+ - sort_values: `liked_at`, `repo_likes`, `repo_downloads`
638
+ - param_values:
639
+ - repo_types: `model`, `dataset`, `space`
640
+ - sort: `liked_at`, `repo_likes`, `repo_downloads`
641
+ - fields_contract:
642
+ - allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
643
+ - canonical_only: `true`
644
+ - where_contract:
645
+ - allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
646
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
647
+ - normalized_only: `true`
648
+ - limit_contract:
649
+ - default_limit: `100`
650
+ - max_limit: `2000`
651
+ - enrich_max: `50`
652
+ - ranking_default: `50`
653
+ - scan_max: `10000`
654
+ - notes: Default recency mode is cheap. Popularity-ranked sorts use canonical keys liked_at/repo_likes/repo_downloads and rerank only a bounded recent shortlist. Check meta.ranking_complete / meta.ranking_window when ranking by popularity; helper-owned coverage matters here.
655
+
656
+ ### hf_whoami
657
+
658
+ - category: `identity`
659
+ - returns:
660
+ - envelope: `{ok, item, items, meta, error}`
661
+ - row_type: `user`
662
+ - default_fields: `username`, `fullname`, `is_pro`
663
+ - guaranteed_fields: `username`
664
+ - optional_fields: `fullname`, `is_pro`
665
+ - supported_params: []
666
+ - notes: Returns the current authenticated user when a request token is available.
.prod/agent-cards/shared/_monty_codegen_shared.template.md ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Code Generation Rules
2
+
3
+ - You are writing Python to be executed in a secure runtime environment.
4
+ - **NEVER** use `import` - it is NOT available in this environment.
5
+ - All helper calls are async: always use `await`.
6
+ - Use this exact outer shape:
7
+
8
+ ```py
9
+ async def solve(query, max_calls):
10
+ ...
11
+
12
+ await solve(query, max_calls)
13
+ ```
14
+
15
+ - `max_calls` is the total external-call budget for the whole program.
16
+ - Use only documented `hf_*` helpers.
17
+ - Return plain Python data only: `dict`, `list`, `str`, `int`, `float`, `bool`, or `None`.
18
+ - Do **not** hand-build JSON strings or markdown strings inside `solve(...)` unless the user explicitly asked for prose.
19
+ - Do **not** build your own transport wrapper like `{result: ..., meta: ...}`.
20
+ - If the user says "return only" some fields, return exactly that final shape.
21
+ - If a helper already returns the requested row shape, return `resp["items"]` directly **only when helper coverage is clearly complete**. If helper `meta` suggests partial/unknown coverage, return `{"results": resp["items"], "coverage": resp["meta"]}` instead of bare items.
22
+ - For current-user prompts (`my`, `me`), try helpers with `username=None` / `handle=None` first.
23
+ - If a current-user helper returns `ok=false`, return that helper response directly.
24
+
25
+ ## Search rules
26
+
27
+ - If the user is asking about models, use `hf_models_search(...)`.
28
+ - If the user is asking about datasets, use `hf_datasets_search(...)`.
29
+ - If the user is asking about spaces, use `hf_spaces_search(...)`.
30
+ - Use `hf_repo_search(...)` only for intentionally cross-type search.
31
+ - Use `hf_trending(...)` only for the small "what is trending right now" feed.
32
+ - If the user says "trending" but also adds searchable constraints like `pipeline_tag`, `author`, search text, or `num_params` bounds, prefer the repo search helper sorted by `trending_score`.
33
+ - Think of search helpers as filter-first discovery and `hf_trending(...)` as rank-first current-feed inspection.
34
+
35
+ ## Parameter notes
36
+
37
+ - Trust the generated helper contracts below for per-helper params, fields, sort keys, expand values, and defaults.
38
+ - When the user asks for helper-owned coverage metadata, use `helper_resp["meta"]`.
39
+ - Treat any of the following helper-meta signals as coverage-sensitive: `limit_boundary_hit`, `truncated`, `more_available` not equal to `False`, `sample_complete=false`, `exact_count=false`, `ranking_complete=false`, `ranking_window_hit=true`, or `hard_cap_applied=true`. In those cases, do **not** return bare items; return `{"results": ..., "coverage": ...}`.
40
+ - For pro-only follower/member/liker queries, prefer `pro_only=True` instead of filtering on a projected field.
41
+ - `hf_user_likes(...)` already returns full normalized like rows by default; omit `fields` unless the user asked for a subset.
42
+ - When sorting `hf_user_likes(...)` by `repo_likes` or `repo_downloads`, set `ranking_window=50` unless the user explicitly asked for a narrower recent window.
43
+ - For human-facing follower/member/liker lists without an explicit requested count, prefer `limit=100` and return coverage when more may exist.
44
+ - Unknown `fields` / `where` keys now fail fast. Use only canonical field names.
45
+
46
+ - Ownership phrasing like "what collections does Qwen have", "collections by Qwen", or "collections owned by Qwen" means an owner lookup, so use `hf_collections_search(owner="Qwen")`, not a keyword-only `query="Qwen"` search.
47
+ - Ownership phrasing like "what spaces does X have", "what models does X have", or "what datasets does X have" means an author/owner inventory lookup, so use `hf_spaces_search(author="X")`, `hf_models_search(author="X")`, or `hf_datasets_search(author="X")` rather than a global keyword-only search.
48
+ - Owner/user/org handles may arrive with different casing in the user message; when a handle spelling is uncertain, prefer owner-oriented logic and, if needed, add fallback inside `solve(...)` that broadens to `query=...` and filters owners case-insensitively.
49
+ - For exact aggregate counts like "how many models/datasets/spaces does X have", prefer `hf_profile_summary(...)['item']` counts. Those overview-owned counts may differ slightly from visible public search/list results, so if the user also asked for the list, preserve that distinction.
50
+ - For owner inventory queries without an explicit requested count, use `hf_profile_summary(...)` first when a specific owner is known. If the count is modest, use it to size the follow-up list call; otherwise return a bounded list plus coverage instead of pretending completeness.
51
+ - Think like `huggingface_hub`: `search`, `filter`, `author`, repo-type-specific upstream params, then `fields`.
52
+ - Push constraints upstream whenever a first-class helper argument exists.
53
+ - `post_filter` is only for normalized row filters that cannot be pushed upstream.
54
+ - Keep `post_filter` simple:
55
+ - exact match or `in` for returned fields like `runtime_stage`
56
+ - `gte` / `lte` for normalized numeric fields like `num_params`, `downloads`, and `likes`
57
+ - `num_params` is one of the main valid reasons to use `post_filter` on model search today.
58
+ - Do **not** use `post_filter` for things that already have first-class upstream params like `author`, `pipeline_tag`, `dataset_name`, `language`, `models`, or `datasets`.
59
+
60
+ Examples:
61
+
62
+ ```py
63
+ await hf_models_search(pipeline_tag="text-to-image", limit=10)
64
+ await hf_datasets_search(search="speech", sort="downloads", limit=10)
65
+ await hf_spaces_search(post_filter={"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}})
66
+ await hf_models_search(
67
+ pipeline_tag="text-generation",
68
+ sort="trending_score",
69
+ limit=50,
70
+ post_filter={"num_params": {"gte": 20_000_000_000, "lte": 80_000_000_000}},
71
+ )
72
+ await hf_collections_search(owner="Qwen", limit=10)
73
+ ```
74
+
75
+ Field-only pattern:
76
+
77
+ ```py
78
+ resp = await hf_models_search(
79
+ pipeline_tag="text-to-image",
80
+ fields=["repo_id", "author", "likes", "downloads", "repo_url"],
81
+ limit=3,
82
+ )
83
+ return resp["items"]
84
+ ```
85
+
86
+ Coverage pattern:
87
+
88
+ ```py
89
+ resp = await hf_user_likes(
90
+ username="julien-c",
91
+ sort="repo_likes",
92
+ ranking_window=50,
93
+ limit=20,
94
+ fields=["repo_id", "repo_likes", "repo_url"],
95
+ )
96
+ return {"results": resp["items"], "coverage": resp["meta"]}
97
+ ```
98
+
99
+ Owner-inventory pattern:
100
+
101
+ ```py
102
+ profile = await hf_profile_summary(handle="huggingface")
103
+ count = (profile.get("item") or {}).get("spaces_count")
104
+ limit = 200 if not isinstance(count, int) else min(max(count, 1), 200)
105
+ resp = await hf_spaces_search(
106
+ author="huggingface",
107
+ limit=limit,
108
+ fields=["repo_id", "repo_url"],
109
+ )
110
+ meta = resp.get("meta") or {}
111
+ if meta.get("limit_boundary_hit") or meta.get("more_available") not in {False, None}:
112
+ return {"results": resp["items"], "coverage": {**meta, "profile_spaces_count": count}}
113
+ return resp["items"]
114
+ ```
115
+
116
+ Profile-count pattern:
117
+
118
+ ```py
119
+ profile = await hf_profile_summary(handle="mishig")
120
+ item = profile["item"] or {}
121
+ return {
122
+ "followers_count": item.get("followers_count"),
123
+ "following_count": item.get("following_count"),
124
+ }
125
+ ```
126
+
127
+ Pro-followers pattern:
128
+
129
+ ```py
130
+ followers = await hf_user_graph(
131
+ relation="followers",
132
+ pro_only=True,
133
+ limit=20,
134
+ fields=["username"],
135
+ )
136
+ return followers["items"]
137
+ ```
138
+
139
+ ## Navigation graph
140
+
141
+ Use the helper that matches the question type.
142
+
143
+ - exact repo details → `hf_repo_details(...)`
144
+ - model search/list/discovery → `hf_models_search(...)`
145
+ - dataset search/list/discovery → `hf_datasets_search(...)`
146
+ - space search/list/discovery → `hf_spaces_search(...)`
147
+ - cross-type repo search → `hf_repo_search(...)`
148
+ - trending repos → `hf_trending(...)`
149
+ - daily papers → `hf_daily_papers(...)`
150
+ - repo discussions → `hf_repo_discussions(...)`
151
+ - specific discussion details → `hf_repo_discussion_details(...)`
152
+ - users who liked one repo → `hf_repo_likers(...)`
153
+ - profile / overview / aggregate counts → `hf_profile_summary(...)`
154
+ - followers / following lists → `hf_user_graph(...)`
155
+ - repos a user liked → `hf_user_likes(...)`
156
+ - recent activity feed → `hf_recent_activity(...)`
157
+ - organization members → `hf_org_members(...)`
158
+ - collections search → `hf_collections_search(...)`
159
+ - items inside a known collection → `hf_collection_items(...)`
160
+ - explicit current username → `hf_whoami()`
161
+
162
+ Direction reminders:
163
+ - `hf_user_likes(...)` = user → repos
164
+ - `hf_repo_likers(...)` = repo → users
165
+ - `hf_user_graph(...)` = user/org → followers/following
166
+
167
+ ## Helper result shape
168
+
169
+ All helpers return:
170
+
171
+ ```py
172
+ {
173
+ "ok": bool,
174
+ "item": dict | None,
175
+ "items": list[dict],
176
+ "meta": dict,
177
+ "error": str | None,
178
+ }
179
+ ```
180
+
181
+ Rules:
182
+ - `items` is the canonical list field.
183
+ - `item` is just a singleton convenience.
184
+ - `meta` contains helper-owned execution, limit, and coverage info.
185
+ - When helper-owned coverage matters, prefer returning the helper envelope directly.
186
+
187
+ ## High-signal output rules
188
+
189
+ - Prefer compact dict/list outputs over prose when the user asked for fields.
190
+ - Prefer summary helpers before detail hydration.
191
+ - Use canonical snake_case keys in generated code and structured output.
192
+ - Use `repo_id` as the display label for repos.
193
+ - Use `hf_profile_summary(...)['item']` for aggregate counts such as followers, following, models, datasets, and spaces.
194
+ - For selective one-shot search helpers, treat `meta.limit_boundary_hit=true` as a partial/unknown-coverage warning even if `meta.truncated` is still `false`.
195
+ - For joins/intersections/rankings, fetch the needed working set first and compute locally.
196
+ - If the result is partial, use top-level keys `results` and `coverage`.
197
+
198
+ {{GENERATED_HELPER_SIGNATURES}}
199
+
200
+ {{GENERATED_HELPER_CONTRACTS}}
.prod/agent-cards/shared/_monty_helper_contracts.md ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Helper contracts (generated from runtime + wrapper metadata)
2
+
3
+ These contracts describe the normalized wrapper surface exposed to generated code.
4
+ Field names and helper-visible enum values are canonical snake_case wrapper names.
5
+
6
+ All helpers return the same envelope: `{ok, item, items, meta, error}`.
7
+
8
+ ### hf_collection_items
9
+
10
+ - category: `collection_navigation`
11
+ - returns:
12
+ - envelope: `{ok, item, items, meta, error}`
13
+ - row_type: `repo`
14
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
15
+ - guaranteed_fields: `repo_id`, `repo_type`, `repo_url`
16
+ - optional_fields: `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
17
+ - supported_params: `collection_id`, `repo_types`, `limit`, `count_only`, `where`, `fields`
18
+ - param_values:
19
+ - repo_types: `model`, `dataset`, `space`
20
+ - fields_contract:
21
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
22
+ - canonical_only: `true`
23
+ - where_contract:
24
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
25
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
26
+ - normalized_only: `true`
27
+ - limit_contract:
28
+ - default_limit: `100`
29
+ - max_limit: `500`
30
+ - notes: Returns repos inside one collection as summary rows.
31
+
32
+ ### hf_collections_search
33
+
34
+ - category: `collection_search`
35
+ - returns:
36
+ - envelope: `{ok, item, items, meta, error}`
37
+ - row_type: `collection`
38
+ - default_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
39
+ - guaranteed_fields: `collection_id`, `title`, `owner`
40
+ - optional_fields: `slug`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
41
+ - supported_params: `query`, `owner`, `limit`, `count_only`, `where`, `fields`
42
+ - fields_contract:
43
+ - allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
44
+ - canonical_only: `true`
45
+ - where_contract:
46
+ - allowed_fields: `collection_id`, `slug`, `title`, `owner`, `owner_type`, `description`, `gating`, `last_updated`, `item_count`
47
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
48
+ - normalized_only: `true`
49
+ - limit_contract:
50
+ - default_limit: `20`
51
+ - max_limit: `500`
52
+ - notes: Collection summary helper.
53
+
54
+ ### hf_daily_papers
55
+
56
+ - category: `curated_feed`
57
+ - returns:
58
+ - envelope: `{ok, item, items, meta, error}`
59
+ - row_type: `daily_paper`
60
+ - default_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
61
+ - guaranteed_fields: `paper_id`, `title`, `published_at`, `rank`
62
+ - optional_fields: `summary`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`
63
+ - supported_params: `limit`, `where`, `fields`
64
+ - fields_contract:
65
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
66
+ - canonical_only: `true`
67
+ - where_contract:
68
+ - allowed_fields: `paper_id`, `title`, `summary`, `published_at`, `submitted_on_daily_at`, `authors`, `organization`, `submitted_by`, `discussion_id`, `upvotes`, `github_repo_url`, `github_stars`, `project_page_url`, `num_comments`, `is_author_participating`, `repo_id`, `rank`
69
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
70
+ - normalized_only: `true`
71
+ - limit_contract:
72
+ - default_limit: `20`
73
+ - max_limit: `500`
74
+ - notes: Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.
75
+
76
+ ### hf_datasets_search
77
+
78
+ - category: `wrapped_hf_repo_search`
79
+ - backed_by: `HfApi.list_datasets`
80
+ - returns:
81
+ - envelope: `{ok, item, items, meta, error}`
82
+ - row_type: `repo`
83
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
84
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
85
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
86
+ - supported_params: `search`, `filter`, `author`, `benchmark`, `dataset_name`, `gated`, `language_creators`, `language`, `multilinguality`, `size_categories`, `task_categories`, `task_ids`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
87
+ - sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
88
+ - expand_values: `author`, `card_data`, `citation`, `created_at`, `description`, `disabled`, `downloads`, `downloads_all_time`, `gated`, `last_modified`, `likes`, `paperswithcode_id`, `private`, `resource_group`, `sha`, `siblings`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
89
+ - fields_contract:
90
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
91
+ - canonical_only: `true`
92
+ - post_filter_contract:
93
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
94
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
95
+ - normalized_only: `true`
96
+ - limit_contract:
97
+ - default_limit: `20`
98
+ - max_limit: `5000`
99
+ - notes: Thin dataset-search wrapper around the Hub list_datasets path. Prefer this over hf_repo_search for dataset-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
100
+
101
+ ### hf_models_search
102
+
103
+ - category: `wrapped_hf_repo_search`
104
+ - backed_by: `HfApi.list_models`
105
+ - returns:
106
+ - envelope: `{ok, item, items, meta, error}`
107
+ - row_type: `repo`
108
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
109
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
110
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
111
+ - supported_params: `search`, `filter`, `author`, `apps`, `gated`, `inference`, `inference_provider`, `model_name`, `trained_dataset`, `pipeline_tag`, `emissions_thresholds`, `sort`, `limit`, `expand`, `full`, `card_data`, `fetch_config`, `fields`, `post_filter`
112
+ - sort_values: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
113
+ - expand_values: `author`, `base_models`, `card_data`, `config`, `created_at`, `disabled`, `downloads`, `downloads_all_time`, `eval_results`, `gated`, `gguf`, `inference`, `inference_provider_mapping`, `last_modified`, `library_name`, `likes`, `mask_token`, `model_index`, `pipeline_tag`, `private`, `resource_group`, `safetensors`, `sha`, `siblings`, `spaces`, `tags`, `transformers_info`, `trending_score`, `widget_data`, `xet_enabled`, `gitaly_uid`
114
+ - fields_contract:
115
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
116
+ - canonical_only: `true`
117
+ - post_filter_contract:
118
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
119
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
120
+ - normalized_only: `true`
121
+ - limit_contract:
122
+ - default_limit: `20`
123
+ - max_limit: `5000`
124
+ - notes: Thin model-search wrapper around the Hub list_models path. Prefer this over hf_repo_search for model-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
125
+
126
+ ### hf_org_members
127
+
128
+ - category: `graph_scan`
129
+ - returns:
130
+ - envelope: `{ok, item, items, meta, error}`
131
+ - row_type: `actor`
132
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
133
+ - guaranteed_fields: `username`
134
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
135
+ - supported_params: `organization`, `limit`, `scan_limit`, `count_only`, `where`, `fields`
136
+ - fields_contract:
137
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
138
+ - canonical_only: `true`
139
+ - where_contract:
140
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
141
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
142
+ - normalized_only: `true`
143
+ - limit_contract:
144
+ - default_limit: `1000`
145
+ - max_limit: `10000`
146
+ - scan_max: `10000`
147
+ - notes: Returns organization member summary rows.
148
+
149
+ ### hf_profile_summary
150
+
151
+ - category: `profile_summary`
152
+ - returns:
153
+ - envelope: `{ok, item, items, meta, error}`
154
+ - row_type: `profile`
155
+ - default_fields: `handle`, `entity_type`, `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
156
+ - guaranteed_fields: `handle`, `entity_type`
157
+ - optional_fields: `display_name`, `bio`, `description`, `avatar_url`, `website_url`, `twitter_url`, `github_url`, `linkedin_url`, `bluesky_url`, `followers_count`, `following_count`, `likes_count`, `members_count`, `models_count`, `datasets_count`, `spaces_count`, `discussions_count`, `papers_count`, `upvotes_count`, `organizations`, `is_pro`, `likes_sample`, `activity_sample`
158
+ - supported_params: `handle`, `include`, `likes_limit`, `activity_limit`
159
+ - param_values:
160
+ - include: `likes`, `activity`
161
+ - notes: Profile summary helper. Aggregate counts like followers_count/following_count are in the base item. include=['likes', 'activity'] adds composed samples and extra upstream work; no other include values are supported. Overview-owned repo counts may differ slightly from visible public search/list results.
162
+
163
+ ### hf_recent_activity
164
+
165
+ - category: `activity_feed`
166
+ - returns:
167
+ - envelope: `{ok, item, items, meta, error}`
168
+ - row_type: `activity`
169
+ - default_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
170
+ - guaranteed_fields: `event_type`, `timestamp`
171
+ - optional_fields: `repo_id`, `repo_type`
172
+ - supported_params: `feed_type`, `entity`, `activity_types`, `repo_types`, `limit`, `max_pages`, `start_cursor`, `count_only`, `where`, `fields`
173
+ - param_values:
174
+ - feed_type: `user`, `org`
175
+ - repo_types: `model`, `dataset`, `space`
176
+ - fields_contract:
177
+ - allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
178
+ - canonical_only: `true`
179
+ - where_contract:
180
+ - allowed_fields: `event_type`, `repo_id`, `repo_type`, `timestamp`
181
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
182
+ - normalized_only: `true`
183
+ - limit_contract:
184
+ - default_limit: `100`
185
+ - max_limit: `2000`
186
+ - max_pages: `10`
187
+ - page_limit: `100`
188
+ - notes: Activity helper may fetch multiple pages when requested coverage exceeds one page. count_only may still be a lower bound unless the feed exhausts before max_pages.
189
+
190
+ ### hf_repo_details
191
+
192
+ - category: `repo_detail`
193
+ - returns:
194
+ - envelope: `{ok, item, items, meta, error}`
195
+ - row_type: `repo`
196
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
197
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
198
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
199
+ - supported_params: `repo_id`, `repo_ids`, `repo_type`, `fields`
200
+ - param_values:
201
+ - repo_type: `model`, `dataset`, `space`, `auto`
202
+ - fields_contract:
203
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
204
+ - canonical_only: `true`
205
+ - notes: Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.
206
+
207
+ ### hf_repo_discussion_details
208
+
209
+ - category: `discussion_detail`
210
+ - returns:
211
+ - envelope: `{ok, item, items, meta, error}`
212
+ - row_type: `discussion_detail`
213
+ - default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
214
+ - guaranteed_fields: `repo_id`, `repo_type`, `title`, `author`, `status`
215
+ - optional_fields: `num`, `created_at`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
216
+ - supported_params: `repo_type`, `repo_id`, `discussion_num`, `fields`
217
+ - param_values:
218
+ - repo_type: `model`, `dataset`, `space`
219
+ - fields_contract:
220
+ - allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`, `comment_count`, `latest_comment_author`, `latest_comment_created_at`, `latest_comment_text`, `latest_comment_html`
221
+ - canonical_only: `true`
222
+ - notes: Exact discussion detail helper.
223
+
224
+ ### hf_repo_discussions
225
+
226
+ - category: `discussion_summary`
227
+ - returns:
228
+ - envelope: `{ok, item, items, meta, error}`
229
+ - row_type: `discussion`
230
+ - default_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
231
+ - guaranteed_fields: `num`, `title`, `author`, `status`
232
+ - optional_fields: `repo_id`, `repo_type`, `created_at`, `url`
233
+ - supported_params: `repo_type`, `repo_id`, `limit`, `fields`
234
+ - param_values:
235
+ - repo_type: `model`, `dataset`, `space`
236
+ - fields_contract:
237
+ - allowed_fields: `num`, `repo_id`, `repo_type`, `title`, `author`, `created_at`, `status`, `url`
238
+ - canonical_only: `true`
239
+ - limit_contract:
240
+ - default_limit: `20`
241
+ - max_limit: `200`
242
+ - notes: Discussion summary helper.
243
+
244
+ ### hf_repo_likers
245
+
246
+ - category: `repo_to_users`
247
+ - returns:
248
+ - envelope: `{ok, item, items, meta, error}`
249
+ - row_type: `actor`
250
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
251
+ - guaranteed_fields: `username`
252
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
253
+ - supported_params: `repo_id`, `repo_type`, `limit`, `count_only`, `pro_only`, `where`, `fields`
254
+ - param_values:
255
+ - repo_type: `model`, `dataset`, `space`
256
+ - fields_contract:
257
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
258
+ - canonical_only: `true`
259
+ - where_contract:
260
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
261
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
262
+ - normalized_only: `true`
263
+ - limit_contract:
264
+ - default_limit: `1000`
265
+ - notes: Returns users who liked a repo.
266
+
267
+ ### hf_repo_search
268
+
269
+ - category: `cross_type_repo_search`
270
+ - returns:
271
+ - envelope: `{ok, item, items, meta, error}`
272
+ - row_type: `repo`
273
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
274
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
275
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
276
+ - supported_params: `search`, `repo_type`, `repo_types`, `filter`, `author`, `sort`, `limit`, `fields`, `post_filter`
277
+ - sort_values_by_repo_type:
278
+ - dataset: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
279
+ - model: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
280
+ - space: `created_at`, `last_modified`, `likes`, `trending_score`
281
+ - param_values:
282
+ - repo_type: `model`, `dataset`, `space`
283
+ - repo_types: `model`, `dataset`, `space`
284
+ - sort: `created_at`, `downloads`, `last_modified`, `likes`, `trending_score`
285
+ - fields_contract:
286
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
287
+ - canonical_only: `true`
288
+ - post_filter_contract:
289
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
290
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
291
+ - normalized_only: `true`
292
+ - limit_contract:
293
+ - default_limit: `20`
294
+ - max_limit: `5000`
295
+ - notes: Small generic repo-search helper. Prefer hf_models_search, hf_datasets_search, or hf_spaces_search for single-type queries; use hf_repo_search for intentionally cross-type search. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
296
+
297
+ ### hf_runtime_capabilities
298
+
299
+ - category: `introspection`
300
+ - returns:
301
+ - envelope: `{ok, item, items, meta, error}`
302
+ - row_type: `runtime_capability`
303
+ - default_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
304
+ - guaranteed_fields: `allowed_sections`, `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
305
+ - optional_fields: []
306
+ - supported_params: `section`
307
+ - param_values:
308
+ - section: `overview`, `helpers`, `helper_contracts`, `helper_defaults`, `fields`, `limits`, `repo_search`
309
+ - notes: Introspection helper. Use section=... to narrow the response.
310
+
311
+ ### hf_spaces_search
312
+
313
+ - category: `wrapped_hf_repo_search`
314
+ - backed_by: `HfApi.list_spaces`
315
+ - returns:
316
+ - envelope: `{ok, item, items, meta, error}`
317
+ - row_type: `repo`
318
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
319
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`
320
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
321
+ - supported_params: `search`, `filter`, `author`, `datasets`, `models`, `linked`, `sort`, `limit`, `expand`, `full`, `fields`, `post_filter`
322
+ - sort_values: `created_at`, `last_modified`, `likes`, `trending_score`
323
+ - expand_values: `author`, `card_data`, `created_at`, `datasets`, `disabled`, `last_modified`, `likes`, `models`, `private`, `resource_group`, `runtime`, `sdk`, `sha`, `siblings`, `subdomain`, `tags`, `trending_score`, `xet_enabled`, `gitaly_uid`
324
+ - fields_contract:
325
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
326
+ - canonical_only: `true`
327
+ - post_filter_contract:
328
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
329
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
330
+ - normalized_only: `true`
331
+ - limit_contract:
332
+ - default_limit: `20`
333
+ - max_limit: `5000`
334
+ - notes: Thin space-search wrapper around the Hub list_spaces path. Prefer this over hf_repo_search for space-only queries. This is a one-shot selective search; if meta.limit_boundary_hit is true, more rows may exist and counts are not exact.
335
+
336
+ ### hf_trending
337
+
338
+ - category: `curated_repo_feed`
339
+ - returns:
340
+ - envelope: `{ok, item, items, meta, error}`
341
+ - row_type: `repo`
342
+ - default_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
343
+ - guaranteed_fields: `repo_id`, `repo_type`, `author`, `repo_url`, `trending_rank`
344
+ - optional_fields: `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`
345
+ - supported_params: `repo_type`, `limit`, `where`, `fields`
346
+ - param_values:
347
+ - repo_type: `model`, `dataset`, `space`, `all`
348
+ - fields_contract:
349
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
350
+ - canonical_only: `true`
351
+ - where_contract:
352
+ - allowed_fields: `repo_id`, `repo_type`, `author`, `likes`, `downloads`, `trending_score`, `created_at`, `last_modified`, `pipeline_tag`, `num_params`, `repo_url`, `tags`, `library_name`, `description`, `paperswithcode_id`, `sdk`, `models`, `datasets`, `subdomain`, `runtime_stage`, `runtime`, `trending_rank`
353
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
354
+ - normalized_only: `true`
355
+ - limit_contract:
356
+ - default_limit: `20`
357
+ - max_limit: `20`
358
+ - notes: Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.
359
+
360
+ ### hf_user_graph
361
+
362
+ - category: `graph_scan`
363
+ - returns:
364
+ - envelope: `{ok, item, items, meta, error}`
365
+ - row_type: `actor`
366
+ - default_fields: `username`, `fullname`, `is_pro`, `role`, `type`
367
+ - guaranteed_fields: `username`
368
+ - optional_fields: `fullname`, `is_pro`, `role`, `type`
369
+ - supported_params: `username`, `relation`, `limit`, `scan_limit`, `count_only`, `pro_only`, `where`, `fields`
370
+ - param_values:
371
+ - relation: `followers`, `following`
372
+ - fields_contract:
373
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
374
+ - canonical_only: `true`
375
+ - where_contract:
376
+ - allowed_fields: `username`, `fullname`, `is_pro`, `role`, `type`
377
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
378
+ - normalized_only: `true`
379
+ - limit_contract:
380
+ - default_limit: `1000`
381
+ - max_limit: `10000`
382
+ - scan_max: `10000`
383
+ - notes: Returns followers/following summary rows.
384
+
385
+ ### hf_user_likes
386
+
387
+ - category: `user_to_repos`
388
+ - returns:
389
+ - envelope: `{ok, item, items, meta, error}`
390
+ - row_type: `user_like`
391
+ - default_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
392
+ - guaranteed_fields: `liked_at`, `repo_id`, `repo_type`
393
+ - optional_fields: `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
394
+ - supported_params: `username`, `repo_types`, `limit`, `scan_limit`, `count_only`, `where`, `fields`, `sort`, `ranking_window`
395
+ - sort_values: `liked_at`, `repo_likes`, `repo_downloads`
396
+ - param_values:
397
+ - repo_types: `model`, `dataset`, `space`
398
+ - sort: `liked_at`, `repo_likes`, `repo_downloads`
399
+ - fields_contract:
400
+ - allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
401
+ - canonical_only: `true`
402
+ - where_contract:
403
+ - allowed_fields: `liked_at`, `repo_id`, `repo_type`, `repo_author`, `repo_likes`, `repo_downloads`, `repo_url`
404
+ - supported_ops: `eq`, `in`, `contains`, `icontains`, `gte`, `lte`
405
+ - normalized_only: `true`
406
+ - limit_contract:
407
+ - default_limit: `100`
408
+ - max_limit: `2000`
409
+ - enrich_max: `50`
410
+ - ranking_default: `50`
411
+ - scan_max: `10000`
412
+ - notes: Default recency mode is cheap. Popularity-ranked sorts use canonical keys liked_at/repo_likes/repo_downloads and rerank only a bounded recent shortlist. Check meta.ranking_complete / meta.ranking_window when ranking by popularity; helper-owned coverage matters here.
413
+
414
+ ### hf_whoami
415
+
416
+ - category: `identity`
417
+ - returns:
418
+ - envelope: `{ok, item, items, meta, error}`
419
+ - row_type: `user`
420
+ - default_fields: `username`, `fullname`, `is_pro`
421
+ - guaranteed_fields: `username`
422
+ - optional_fields: `fullname`, `is_pro`
423
+ - supported_params: []
424
+ - notes: Returns the current authenticated user when a request token is available.
.prod/agent-cards/shared/_monty_helper_signatures.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Helper signatures (generated from Python)
2
+
3
+ These signatures are exported from the live runtime with `inspect.signature(...)`.
4
+ If prompt prose and signatures disagree, trust these signatures.
5
+
6
+ ```py
7
+ await hf_collection_items(collection_id: 'str', repo_types: 'list[str] | None' = None, limit: 'int' = 100, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
8
+
9
+ await hf_collections_search(query: 'str | None' = None, owner: 'str | None' = None, limit: 'int' = 20, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
10
+
11
+ await hf_daily_papers(limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
12
+
13
+ await hf_datasets_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, benchmark: 'str | bool | None' = None, dataset_name: 'str | None' = None, gated: 'bool | None' = None, language_creators: 'str | list[str] | None' = None, language: 'str | list[str] | None' = None, multilinguality: 'str | list[str] | None' = None, size_categories: 'str | list[str] | None' = None, task_categories: 'str | list[str] | None' = None, task_ids: 'str | list[str] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
14
+
15
+ await hf_models_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, apps: 'str | list[str] | None' = None, gated: 'bool | None' = None, inference: 'str | None' = None, inference_provider: 'str | list[str] | None' = None, model_name: 'str | None' = None, trained_dataset: 'str | list[str] | None' = None, pipeline_tag: 'str | None' = None, emissions_thresholds: 'tuple[float, float] | None' = None, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, card_data: 'bool' = False, fetch_config: 'bool' = False, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
16
+
17
+ await hf_org_members(organization: 'str', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
18
+
19
+ await hf_profile_summary(handle: 'str | None' = None, include: 'list[str] | None' = None, likes_limit: 'int' = 10, activity_limit: 'int' = 10) -> 'dict[str, Any]'
20
+
21
+ await hf_recent_activity(feed_type: 'str | None' = None, entity: 'str | None' = None, activity_types: 'list[str] | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, max_pages: 'int | None' = None, start_cursor: 'str | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
22
+
23
+ await hf_repo_details(repo_id: 'str | None' = None, repo_ids: 'list[str] | None' = None, repo_type: 'str' = 'auto', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
24
+
25
+ await hf_repo_discussion_details(repo_type: 'str', repo_id: 'str', discussion_num: 'int', fields: 'list[str] | None' = None) -> 'dict[str, Any]'
26
+
27
+ await hf_repo_discussions(repo_type: 'str', repo_id: 'str', limit: 'int' = 20, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
28
+
29
+ await hf_repo_likers(repo_id: 'str', repo_type: 'str', limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
30
+
31
+ await hf_repo_search(search: 'str | None' = None, repo_type: 'str | None' = None, repo_types: 'list[str] | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, sort: 'str | None' = None, limit: 'int' = 20, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
32
+
33
+ await hf_runtime_capabilities(section: 'str | None' = None) -> 'dict[str, Any]'
34
+
35
+ await hf_spaces_search(search: 'str | None' = None, filter: 'str | list[str] | None' = None, author: 'str | None' = None, datasets: 'str | list[str] | None' = None, models: 'str | list[str] | None' = None, linked: 'bool' = False, sort: 'str | None' = None, limit: 'int' = 20, expand: 'list[str] | None' = None, full: 'bool | None' = None, fields: 'list[str] | None' = None, post_filter: 'dict[str, Any] | None' = None) -> 'dict[str, Any]'
36
+
37
+ await hf_trending(repo_type: 'str' = 'model', limit: 'int' = 20, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
38
+
39
+ await hf_user_graph(username: 'str | None' = None, relation: 'str' = 'followers', limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, pro_only: 'bool | None' = None, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None) -> 'dict[str, Any]'
40
+
41
+ await hf_user_likes(username: 'str | None' = None, repo_types: 'list[str] | None' = None, limit: 'int | None' = None, scan_limit: 'int | None' = None, count_only: 'bool' = False, where: 'dict[str, Any] | None' = None, fields: 'list[str] | None' = None, sort: 'str | None' = None, ranking_window: 'int | None' = None) -> 'dict[str, Any]'
42
+
43
+ await hf_whoami() -> 'dict[str, Any]'
44
+ ```
.prod/monty_api/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from .registry import HELPER_EXTERNALS
4
+
5
+
6
+ def __getattr__(name: str): # pragma: no cover - tiny import shim
7
+ if name in {"hf_hub_query", "hf_hub_query_raw", "main"}:
8
+ from .query_entrypoints import hf_hub_query, hf_hub_query_raw, main
9
+
10
+ exports = {
11
+ "hf_hub_query": hf_hub_query,
12
+ "hf_hub_query_raw": hf_hub_query_raw,
13
+ "main": main,
14
+ }
15
+ return exports[name]
16
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
17
+
18
+ __all__ = [
19
+ "HELPER_EXTERNALS",
20
+ "hf_hub_query",
21
+ "hf_hub_query_raw",
22
+ "main",
23
+ ]
.prod/monty_api/aliases.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import get_args
4
+
5
+ try:
6
+ from huggingface_hub.hf_api import DatasetSort_T, ModelSort_T, SpaceSort_T
7
+ except ModuleNotFoundError: # pragma: no cover - dependency-light test/import path
8
+ DatasetSort_T = ()
9
+ ModelSort_T = ()
10
+ SpaceSort_T = ()
11
+
12
+ REPO_SORT_KEYS: dict[str, set[str]] = {
13
+ "model": set(get_args(ModelSort_T))
14
+ or {
15
+ "created_at",
16
+ "downloads",
17
+ "last_modified",
18
+ "likes",
19
+ "trending_score",
20
+ },
21
+ "dataset": set(get_args(DatasetSort_T))
22
+ or {
23
+ "created_at",
24
+ "downloads",
25
+ "last_modified",
26
+ "likes",
27
+ "trending_score",
28
+ },
29
+ "space": set(get_args(SpaceSort_T))
30
+ or {
31
+ "created_at",
32
+ "last_modified",
33
+ "likes",
34
+ "trending_score",
35
+ },
36
+ }
.prod/monty_api/constants.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ DEFAULT_TIMEOUT_SEC = 90 # Default end-to-end timeout for one Monty run.
4
+
5
+ DEFAULT_MAX_CALLS = 400 # Default external-call budget exposed to callers.
6
+
7
+ MAX_CALLS_LIMIT = 400 # Absolute max external-call budget accepted by the runtime.
8
+
9
+ INTERNAL_STRICT_MODE = False
10
+
11
+ OUTPUT_ITEMS_TRUNCATION_LIMIT = (
12
+ 500 # Final output truncation for oversized `items` payloads.
13
+ )
14
+
15
+ EXHAUSTIVE_HELPER_RETURN_HARD_CAP = (
16
+ 2_000 # Runtime hard cap for exhaustive-helper output rows.
17
+ )
18
+
19
+ SELECTIVE_ENDPOINT_RETURN_HARD_CAP = (
20
+ 200 # Default cap for one-shot selective endpoint helpers.
21
+ )
22
+
23
+ TRENDING_ENDPOINT_MAX_LIMIT = 20 # Upstream `/api/trending` endpoint maximum.
24
+
25
+ GRAPH_SCAN_LIMIT_CAP = 10_000 # Max follower/member rows scanned in one helper call.
26
+
27
+ LIKES_SCAN_LIMIT_CAP = 10_000 # Max like-event rows scanned in one helper call.
28
+
29
+ LIKES_RANKING_WINDOW_DEFAULT = (
30
+ 50 # Default shortlist size when ranking likes by repo popularity.
31
+ )
32
+
33
+ LIKES_ENRICHMENT_MAX_REPOS = (
34
+ 50 # Max liked repos enriched with extra repo-detail calls.
35
+ )
36
+
37
+ RECENT_ACTIVITY_PAGE_SIZE = 100 # Rows requested per `/api/recent-activity` page.
38
+
39
+ RECENT_ACTIVITY_SCAN_MAX_PAGES = (
40
+ 10 # Max recent-activity pages fetched in one helper call.
41
+ )
42
+
43
+ USER_SUMMARY_LIKES_SCAN_LIMIT = 1_000 # Like rows sampled for user summary.
44
+
45
+ USER_SUMMARY_ACTIVITY_MAX_PAGES = 3 # Activity pages sampled for user summary.
46
+
47
+ DEFAULT_MONTY_MAX_MEMORY = 64 * 1024 * 1024 # 64 MiB
48
+
49
+ DEFAULT_MONTY_MAX_ALLOCATIONS = (
50
+ 250_000 # Approximate object-allocation ceiling in the sandbox.
51
+ )
52
+
53
+ DEFAULT_MONTY_MAX_RECURSION_DEPTH = 100 # Python recursion limit inside the sandbox.
54
+
55
+ REPO_CANONICAL_FIELDS: tuple[str, ...] = (
56
+ "repo_id",
57
+ "repo_type",
58
+ "author",
59
+ "likes",
60
+ "downloads",
61
+ "trending_score",
62
+ "created_at",
63
+ "last_modified",
64
+ "pipeline_tag",
65
+ "num_params",
66
+ "repo_url",
67
+ "tags",
68
+ "library_name",
69
+ "description",
70
+ "paperswithcode_id",
71
+ "sdk",
72
+ "models",
73
+ "datasets",
74
+ "subdomain",
75
+ "runtime_stage",
76
+ "runtime",
77
+ )
78
+
79
+ USER_CANONICAL_FIELDS: tuple[str, ...] = (
80
+ "username",
81
+ "fullname",
82
+ "bio",
83
+ "website_url",
84
+ "twitter",
85
+ "github",
86
+ "linkedin",
87
+ "bluesky",
88
+ "followers",
89
+ "following",
90
+ "likes",
91
+ "is_pro",
92
+ )
93
+
94
+ PROFILE_CANONICAL_FIELDS: tuple[str, ...] = (
95
+ "handle",
96
+ "entity_type",
97
+ "display_name",
98
+ "bio",
99
+ "description",
100
+ "avatar_url",
101
+ "website_url",
102
+ "twitter_url",
103
+ "github_url",
104
+ "linkedin_url",
105
+ "bluesky_url",
106
+ "followers_count",
107
+ "following_count",
108
+ "likes_count",
109
+ "members_count",
110
+ "models_count",
111
+ "datasets_count",
112
+ "spaces_count",
113
+ "discussions_count",
114
+ "papers_count",
115
+ "upvotes_count",
116
+ "organizations",
117
+ "is_pro",
118
+ "likes_sample",
119
+ "activity_sample",
120
+ )
121
+
122
+ ACTOR_CANONICAL_FIELDS: tuple[str, ...] = (
123
+ "username",
124
+ "fullname",
125
+ "is_pro",
126
+ "role",
127
+ "type",
128
+ )
129
+
130
+ USER_LIKES_CANONICAL_FIELDS: tuple[str, ...] = (
131
+ "liked_at",
132
+ "repo_id",
133
+ "repo_type",
134
+ "repo_author",
135
+ "repo_likes",
136
+ "repo_downloads",
137
+ "repo_url",
138
+ )
139
+
140
+ DISCUSSION_CANONICAL_FIELDS: tuple[str, ...] = (
141
+ "num",
142
+ "repo_id",
143
+ "repo_type",
144
+ "title",
145
+ "author",
146
+ "created_at",
147
+ "status",
148
+ "url",
149
+ )
150
+
151
+ DISCUSSION_DETAIL_CANONICAL_FIELDS: tuple[str, ...] = (
152
+ "num",
153
+ "repo_id",
154
+ "repo_type",
155
+ "title",
156
+ "author",
157
+ "created_at",
158
+ "status",
159
+ "url",
160
+ "comment_count",
161
+ "latest_comment_author",
162
+ "latest_comment_created_at",
163
+ "latest_comment_text",
164
+ "latest_comment_html",
165
+ )
166
+
167
+ ACTIVITY_CANONICAL_FIELDS: tuple[str, ...] = (
168
+ "event_type",
169
+ "repo_id",
170
+ "repo_type",
171
+ "timestamp",
172
+ )
173
+
174
+ COLLECTION_CANONICAL_FIELDS: tuple[str, ...] = (
175
+ "collection_id",
176
+ "slug",
177
+ "title",
178
+ "owner",
179
+ "owner_type",
180
+ "description",
181
+ "gating",
182
+ "last_updated",
183
+ "item_count",
184
+ )
185
+
186
+ DAILY_PAPER_CANONICAL_FIELDS: tuple[str, ...] = (
187
+ "paper_id",
188
+ "title",
189
+ "summary",
190
+ "published_at",
191
+ "submitted_on_daily_at",
192
+ "authors",
193
+ "organization",
194
+ "submitted_by",
195
+ "discussion_id",
196
+ "upvotes",
197
+ "github_repo_url",
198
+ "github_stars",
199
+ "project_page_url",
200
+ "num_comments",
201
+ "is_author_participating",
202
+ "repo_id",
203
+ "rank",
204
+ )
.prod/monty_api/context_types.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Protocol
4
+
5
+
6
+ class HelperRuntimeContext(Protocol):
7
+ """Typed helper-facing runtime context interface."""
8
+
9
+ helper_registry: dict[str, Any]
10
+ call_count: dict[str, int]
11
+ trace: list[dict[str, Any]]
12
+ limit_summaries: list[dict[str, Any]]
13
+ latest_helper_error_box: dict[str, dict[str, Any] | None]
14
+ internal_helper_used: dict[str, bool]
15
+
16
+ async def call_helper(
17
+ self, helper_name: str, /, *args: Any, **kwargs: Any
18
+ ) -> Any: ...
19
+
20
+ def __getattr__(self, name: str) -> Any: ...
.prod/monty_api/helper_contracts.py ADDED
@@ -0,0 +1,531 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import re
5
+ from collections.abc import Callable, Mapping
6
+ from functools import lru_cache
7
+ from typing import Any, TypedDict, get_args
8
+
9
+ try:
10
+ import huggingface_hub.hf_api as hf_api
11
+ except ModuleNotFoundError: # pragma: no cover - dependency-light test/import path
12
+ hf_api = None
13
+
14
+ from .aliases import REPO_SORT_KEYS
15
+ from .constants import (
16
+ ACTIVITY_CANONICAL_FIELDS,
17
+ ACTOR_CANONICAL_FIELDS,
18
+ COLLECTION_CANONICAL_FIELDS,
19
+ DAILY_PAPER_CANONICAL_FIELDS,
20
+ DISCUSSION_CANONICAL_FIELDS,
21
+ DISCUSSION_DETAIL_CANONICAL_FIELDS,
22
+ PROFILE_CANONICAL_FIELDS,
23
+ REPO_CANONICAL_FIELDS,
24
+ USER_CANONICAL_FIELDS,
25
+ USER_LIKES_CANONICAL_FIELDS,
26
+ )
27
+ from .registry import (
28
+ HELPER_DEFAULT_METADATA,
29
+ PAGINATION_POLICY,
30
+ REPO_SEARCH_ALLOWED_EXPAND,
31
+ RUNTIME_CAPABILITY_FIELDS,
32
+ )
33
+
34
+
35
+ HELPER_RESULT_ENVELOPE = {
36
+ "ok": "bool",
37
+ "item": "dict | None",
38
+ "items": "list[dict]",
39
+ "meta": "dict",
40
+ "error": "str | None",
41
+ }
42
+
43
+ FILTER_OPERATORS = ["eq", "in", "contains", "icontains", "gte", "lte"]
44
+ REPO_TYPE_VALUES = ["model", "dataset", "space"]
45
+ TRENDING_CANONICAL_FIELDS = [*REPO_CANONICAL_FIELDS, "trending_rank"]
46
+ COMMON_REPO_SEARCH_PARAMS = {
47
+ "search",
48
+ "filter",
49
+ "author",
50
+ "sort",
51
+ "limit",
52
+ "fields",
53
+ "post_filter",
54
+ }
55
+
56
+
57
+ class HelperContract(TypedDict, total=False):
58
+ name: str
59
+ signature: str
60
+ category: str
61
+ backed_by: str
62
+ supported_params: list[str]
63
+ sort_values: list[str]
64
+ sort_values_by_repo_type: dict[str, list[str]]
65
+ expand_values: list[str]
66
+ param_values: dict[str, list[str]]
67
+ fields_contract: dict[str, Any]
68
+ where_contract: dict[str, Any]
69
+ post_filter_contract: dict[str, Any]
70
+ limit_contract: dict[str, Any]
71
+ returns: dict[str, Any]
72
+ notes: str
73
+
74
+
75
+ FIELD_GROUPS: dict[str, list[str]] = {
76
+ "activity": list(ACTIVITY_CANONICAL_FIELDS),
77
+ "actor": list(ACTOR_CANONICAL_FIELDS),
78
+ "collection": list(COLLECTION_CANONICAL_FIELDS),
79
+ "daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
80
+ "discussion": list(DISCUSSION_CANONICAL_FIELDS),
81
+ "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
82
+ "profile": list(PROFILE_CANONICAL_FIELDS),
83
+ "repo": list(REPO_CANONICAL_FIELDS),
84
+ "trending_repo": list(TRENDING_CANONICAL_FIELDS),
85
+ "runtime_capability": list(RUNTIME_CAPABILITY_FIELDS),
86
+ "user": list(USER_CANONICAL_FIELDS),
87
+ "user_like": list(USER_LIKES_CANONICAL_FIELDS),
88
+ }
89
+ RUNTIME_CAPABILITY_SECTION_VALUES = [
90
+ field for field in RUNTIME_CAPABILITY_FIELDS if field != "allowed_sections"
91
+ ]
92
+
93
+
94
+ HELPER_CONTRACT_SPECS: dict[str, dict[str, Any]] = {
95
+ "hf_collection_items": {
96
+ "category": "collection_navigation",
97
+ "row_type": "repo",
98
+ "fields_group": "repo",
99
+ "filter_param": "where",
100
+ "filter_group": "repo",
101
+ "param_values": {"repo_types": REPO_TYPE_VALUES},
102
+ },
103
+ "hf_collections_search": {
104
+ "category": "collection_search",
105
+ "row_type": "collection",
106
+ "fields_group": "collection",
107
+ "filter_param": "where",
108
+ "filter_group": "collection",
109
+ },
110
+ "hf_daily_papers": {
111
+ "category": "curated_feed",
112
+ "row_type": "daily_paper",
113
+ "fields_group": "daily_paper",
114
+ "filter_param": "where",
115
+ "filter_group": "daily_paper",
116
+ },
117
+ "hf_datasets_search": {
118
+ "category": "wrapped_hf_repo_search",
119
+ "row_type": "repo",
120
+ "fields_group": "repo",
121
+ "filter_param": "post_filter",
122
+ "filter_group": "repo",
123
+ "upstream_repo_type": "dataset",
124
+ },
125
+ "hf_models_search": {
126
+ "category": "wrapped_hf_repo_search",
127
+ "row_type": "repo",
128
+ "fields_group": "repo",
129
+ "filter_param": "post_filter",
130
+ "filter_group": "repo",
131
+ "upstream_repo_type": "model",
132
+ },
133
+ "hf_org_members": {
134
+ "category": "graph_scan",
135
+ "row_type": "actor",
136
+ "fields_group": "actor",
137
+ "filter_param": "where",
138
+ "filter_group": "actor",
139
+ },
140
+ "hf_profile_summary": {
141
+ "category": "profile_summary",
142
+ "row_type": "profile",
143
+ "param_values": {"include": ["likes", "activity"]},
144
+ },
145
+ "hf_recent_activity": {
146
+ "category": "activity_feed",
147
+ "row_type": "activity",
148
+ "fields_group": "activity",
149
+ "filter_param": "where",
150
+ "filter_group": "activity",
151
+ "param_values": {"feed_type": ["user", "org"], "repo_types": REPO_TYPE_VALUES},
152
+ },
153
+ "hf_repo_details": {
154
+ "category": "repo_detail",
155
+ "row_type": "repo",
156
+ "fields_group": "repo",
157
+ "param_values": {"repo_type": [*REPO_TYPE_VALUES, "auto"]},
158
+ },
159
+ "hf_repo_discussion_details": {
160
+ "category": "discussion_detail",
161
+ "row_type": "discussion_detail",
162
+ "fields_group": "discussion_detail",
163
+ "param_values": {"repo_type": REPO_TYPE_VALUES},
164
+ },
165
+ "hf_repo_discussions": {
166
+ "category": "discussion_summary",
167
+ "row_type": "discussion",
168
+ "fields_group": "discussion",
169
+ "param_values": {"repo_type": REPO_TYPE_VALUES},
170
+ },
171
+ "hf_repo_likers": {
172
+ "category": "repo_to_users",
173
+ "row_type": "actor",
174
+ "fields_group": "actor",
175
+ "filter_param": "where",
176
+ "filter_group": "actor",
177
+ "param_values": {"repo_type": REPO_TYPE_VALUES},
178
+ },
179
+ "hf_repo_search": {
180
+ "category": "cross_type_repo_search",
181
+ "row_type": "repo",
182
+ "fields_group": "repo",
183
+ "filter_param": "post_filter",
184
+ "filter_group": "repo",
185
+ "param_values": {"repo_type": REPO_TYPE_VALUES, "repo_types": REPO_TYPE_VALUES},
186
+ },
187
+ "hf_runtime_capabilities": {
188
+ "category": "introspection",
189
+ "row_type": "runtime_capability",
190
+ "param_values": {"section": list(RUNTIME_CAPABILITY_SECTION_VALUES)},
191
+ },
192
+ "hf_spaces_search": {
193
+ "category": "wrapped_hf_repo_search",
194
+ "row_type": "repo",
195
+ "fields_group": "repo",
196
+ "filter_param": "post_filter",
197
+ "filter_group": "repo",
198
+ "upstream_repo_type": "space",
199
+ },
200
+ "hf_trending": {
201
+ "category": "curated_repo_feed",
202
+ "row_type": "repo",
203
+ "fields_group": "trending_repo",
204
+ "filter_param": "where",
205
+ "filter_group": "trending_repo",
206
+ "param_values": {"repo_type": [*REPO_TYPE_VALUES, "all"]},
207
+ },
208
+ "hf_user_graph": {
209
+ "category": "graph_scan",
210
+ "row_type": "actor",
211
+ "fields_group": "actor",
212
+ "filter_param": "where",
213
+ "filter_group": "actor",
214
+ "param_values": {
215
+ "relation": ["followers", "following"],
216
+ },
217
+ },
218
+ "hf_user_likes": {
219
+ "category": "user_to_repos",
220
+ "row_type": "user_like",
221
+ "fields_group": "user_like",
222
+ "filter_param": "where",
223
+ "filter_group": "user_like",
224
+ "param_values": {
225
+ "repo_types": REPO_TYPE_VALUES,
226
+ "sort": ["liked_at", "repo_likes", "repo_downloads"],
227
+ },
228
+ },
229
+ "hf_whoami": {
230
+ "category": "identity",
231
+ "row_type": "user",
232
+ },
233
+ }
234
+
235
+
236
+ def _dedupe(values: list[str]) -> list[str]:
237
+ seen: set[str] = set()
238
+ out: list[str] = []
239
+ for value in values:
240
+ item = str(value).strip()
241
+ if not item or item in seen:
242
+ continue
243
+ seen.add(item)
244
+ out.append(item)
245
+ return out
246
+
247
+
248
+ def _snake_case_token(value: str) -> str:
249
+ cleaned = str(value).strip().replace("-", "_")
250
+ cleaned = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", cleaned)
251
+ cleaned = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", cleaned)
252
+ cleaned = re.sub(r"__+", "_", cleaned)
253
+ return cleaned.lower()
254
+
255
+
256
+ def repo_expand_alias_map(repo_type: str) -> dict[str, str]:
257
+ aliases: dict[str, str] = {}
258
+ for raw_value in REPO_SEARCH_ALLOWED_EXPAND.get(repo_type, []):
259
+ aliases[str(raw_value)] = str(raw_value)
260
+ aliases[_snake_case_token(str(raw_value))] = str(raw_value)
261
+ return aliases
262
+
263
+
264
+ def normalized_repo_expand_values(repo_type: str) -> list[str]:
265
+ return _dedupe(
266
+ [
267
+ _snake_case_token(value)
268
+ for value in REPO_SEARCH_ALLOWED_EXPAND.get(repo_type, [])
269
+ ]
270
+ )
271
+
272
+
273
+ @lru_cache(maxsize=1)
274
+ def _upstream_repo_search_facts() -> dict[str, dict[str, Any]]:
275
+ alias_names = {
276
+ "dataset": ("list_datasets", "DatasetSort_T"),
277
+ "model": ("list_models", "ModelSort_T"),
278
+ "space": ("list_spaces", "SpaceSort_T"),
279
+ }
280
+ facts: dict[str, dict[str, Any]] = {}
281
+ for repo_type, (method_name, sort_alias_name) in alias_names.items():
282
+ if hf_api is None:
283
+ supported_params = sorted(COMMON_REPO_SEARCH_PARAMS)
284
+ sort_values = sorted(REPO_SORT_KEYS.get(repo_type, set()))
285
+ else:
286
+ method = getattr(hf_api.HfApi, method_name)
287
+ signature = inspect.signature(method)
288
+ supported_params = [
289
+ name for name in signature.parameters if name not in {"self", "token"}
290
+ ]
291
+ sort_alias = getattr(hf_api, sort_alias_name, None)
292
+ sort_values = _dedupe([str(value) for value in get_args(sort_alias)])
293
+ facts[repo_type] = {
294
+ "method_name": f"HfApi.{method_name}",
295
+ "supported_params": supported_params,
296
+ "sort_values": sort_values,
297
+ "expand_values": normalized_repo_expand_values(repo_type),
298
+ }
299
+ return facts
300
+
301
+
302
+ def _returns_contract(helper_name: str, row_type: str | None) -> dict[str, Any]:
303
+ metadata = HELPER_DEFAULT_METADATA.get(helper_name, {})
304
+ returns: dict[str, Any] = {"envelope": dict(HELPER_RESULT_ENVELOPE)}
305
+ if row_type is not None:
306
+ returns["row_type"] = row_type
307
+ for key in ("default_fields", "guaranteed_fields", "optional_fields"):
308
+ value = metadata.get(key)
309
+ if isinstance(value, list):
310
+ returns[key] = list(value)
311
+ return returns
312
+
313
+
314
+ def _limit_contract(helper_name: str) -> dict[str, Any] | None:
315
+ metadata = HELPER_DEFAULT_METADATA.get(helper_name, {})
316
+ limits: dict[str, Any] = {}
317
+ for key in ("default_limit", "max_limit"):
318
+ value = metadata.get(key)
319
+ if value is not None:
320
+ limits[key] = value
321
+ for key, value in PAGINATION_POLICY.get(helper_name, {}).items():
322
+ if value is not None and key not in limits:
323
+ limits[key] = value
324
+ return limits or None
325
+
326
+
327
+ def _fields_contract(field_group: str | None) -> dict[str, Any] | None:
328
+ if field_group is None:
329
+ return None
330
+ return {
331
+ "canonical_only": True,
332
+ "allowed_fields": list(FIELD_GROUPS[field_group]),
333
+ }
334
+
335
+
336
+ def _filter_contract(filter_param: str | None, field_group: str | None) -> tuple[str, dict[str, Any]] | None:
337
+ if filter_param is None or field_group is None:
338
+ return None
339
+ return (
340
+ f"{filter_param}_contract",
341
+ {
342
+ "allowed_fields": list(FIELD_GROUPS[field_group]),
343
+ "supported_ops": list(FILTER_OPERATORS),
344
+ "normalized_only": True,
345
+ },
346
+ )
347
+
348
+
349
+ def _notes_for_helper(helper_name: str) -> str | None:
350
+ note = HELPER_DEFAULT_METADATA.get(helper_name, {}).get("notes")
351
+ if not isinstance(note, str):
352
+ return None
353
+ cleaned = note.strip()
354
+ return cleaned or None
355
+
356
+
357
+ def _param_values_for_helper(helper_name: str) -> dict[str, list[str]] | None:
358
+ values = {
359
+ key: list(raw_values)
360
+ for key, raw_values in HELPER_CONTRACT_SPECS.get(helper_name, {})
361
+ .get("param_values", {})
362
+ .items()
363
+ }
364
+ if helper_name == "hf_repo_search":
365
+ values["sort"] = sorted(_dedupe([key for keys in REPO_SORT_KEYS.values() for key in keys]))
366
+ return values or None
367
+
368
+
369
+ def build_helper_contracts(
370
+ helper_functions: Mapping[str, Callable[..., Any]],
371
+ ) -> dict[str, HelperContract]:
372
+ upstream_facts = _upstream_repo_search_facts()
373
+ contracts: dict[str, HelperContract] = {}
374
+ for helper_name, fn in sorted(helper_functions.items()):
375
+ spec = HELPER_CONTRACT_SPECS.get(helper_name, {})
376
+ row_type = spec.get("row_type")
377
+ fields_group = spec.get("fields_group")
378
+ filter_param = spec.get("filter_param")
379
+ filter_group = spec.get("filter_group")
380
+ contract: HelperContract = {
381
+ "name": helper_name,
382
+ "signature": f"await {helper_name}{inspect.signature(fn)}",
383
+ "category": str(spec.get("category") or "helper"),
384
+ "supported_params": list(inspect.signature(fn).parameters),
385
+ "returns": _returns_contract(helper_name, row_type),
386
+ }
387
+ fields_contract = _fields_contract(fields_group)
388
+ if fields_contract is not None:
389
+ contract["fields_contract"] = fields_contract
390
+ filter_contract = _filter_contract(filter_param, filter_group)
391
+ if filter_contract is not None:
392
+ contract[filter_contract[0]] = filter_contract[1]
393
+ limit_contract = _limit_contract(helper_name)
394
+ if limit_contract is not None:
395
+ contract["limit_contract"] = limit_contract
396
+ param_values = _param_values_for_helper(helper_name)
397
+ if param_values is not None:
398
+ contract["param_values"] = param_values
399
+
400
+ upstream_repo_type = spec.get("upstream_repo_type")
401
+ if isinstance(upstream_repo_type, str):
402
+ upstream = upstream_facts[upstream_repo_type]
403
+ contract["backed_by"] = str(upstream["method_name"])
404
+ contract["sort_values"] = list(upstream["sort_values"])
405
+ contract["expand_values"] = list(upstream["expand_values"])
406
+ elif helper_name == "hf_repo_search":
407
+ contract["sort_values_by_repo_type"] = {
408
+ repo_type: sorted(values)
409
+ for repo_type, values in sorted(REPO_SORT_KEYS.items())
410
+ }
411
+
412
+ if helper_name == "hf_user_likes":
413
+ contract["sort_values"] = ["liked_at", "repo_likes", "repo_downloads"]
414
+
415
+ note = _notes_for_helper(helper_name)
416
+ if note is not None:
417
+ contract["notes"] = note
418
+ contracts[helper_name] = contract
419
+ return contracts
420
+
421
+
422
+ def _format_list(values: list[str] | None) -> str:
423
+ if not values:
424
+ return "[]"
425
+ return ", ".join(f"`{value}`" for value in values)
426
+
427
+
428
+ def _append_returns(lines: list[str], returns: Mapping[str, Any]) -> None:
429
+ lines.append("- returns:")
430
+ envelope = returns.get("envelope")
431
+ if isinstance(envelope, Mapping):
432
+ lines.append(" - envelope: `{ok, item, items, meta, error}`")
433
+ row_type = returns.get("row_type")
434
+ if isinstance(row_type, str):
435
+ lines.append(f" - row_type: `{row_type}`")
436
+ for key in ("default_fields", "guaranteed_fields", "optional_fields"):
437
+ value = returns.get(key)
438
+ if isinstance(value, list):
439
+ lines.append(f" - {key}: {_format_list(value)}")
440
+
441
+
442
+ def _append_named_contract(
443
+ lines: list[str],
444
+ label: str,
445
+ contract: Mapping[str, Any] | None,
446
+ ) -> None:
447
+ if not isinstance(contract, Mapping):
448
+ return
449
+ lines.append(f"- {label}:")
450
+ allowed_fields = contract.get("allowed_fields")
451
+ if isinstance(allowed_fields, list):
452
+ lines.append(f" - allowed_fields: {_format_list(allowed_fields)}")
453
+ supported_ops = contract.get("supported_ops")
454
+ if isinstance(supported_ops, list):
455
+ lines.append(f" - supported_ops: {_format_list(supported_ops)}")
456
+ canonical_only = contract.get("canonical_only")
457
+ if canonical_only is True:
458
+ lines.append(" - canonical_only: `true`")
459
+ normalized_only = contract.get("normalized_only")
460
+ if normalized_only is True:
461
+ lines.append(" - normalized_only: `true`")
462
+
463
+
464
+ def _append_limit_contract(lines: list[str], contract: Mapping[str, Any] | None) -> None:
465
+ if not isinstance(contract, Mapping) or not contract:
466
+ return
467
+ lines.append("- limit_contract:")
468
+ for key, value in contract.items():
469
+ lines.append(f" - {key}: `{value}`")
470
+
471
+
472
+ def _append_param_values(lines: list[str], param_values: Mapping[str, Any] | None) -> None:
473
+ if not isinstance(param_values, Mapping) or not param_values:
474
+ return
475
+ lines.append("- param_values:")
476
+ for key, value in param_values.items():
477
+ if isinstance(value, list):
478
+ lines.append(f" - {key}: {_format_list(value)}")
479
+
480
+
481
+ def build_helper_contracts_markdown(
482
+ helper_contracts: Mapping[str, Mapping[str, Any]],
483
+ ) -> str:
484
+ lines = [
485
+ "## Helper contracts (generated from runtime + wrapper metadata)",
486
+ "",
487
+ "These contracts describe the normalized wrapper surface exposed to generated code.",
488
+ "Field names and helper-visible enum values are canonical snake_case wrapper names.",
489
+ "",
490
+ "All helpers return the same envelope: `{ok, item, items, meta, error}`.",
491
+ "",
492
+ ]
493
+ for helper_name, contract in sorted(helper_contracts.items()):
494
+ lines.append(f"### {helper_name}")
495
+ lines.append("")
496
+ category = contract.get("category")
497
+ if isinstance(category, str):
498
+ lines.append(f"- category: `{category}`")
499
+ backed_by = contract.get("backed_by")
500
+ if isinstance(backed_by, str):
501
+ lines.append(f"- backed_by: `{backed_by}`")
502
+ returns = contract.get("returns")
503
+ if isinstance(returns, Mapping):
504
+ _append_returns(lines, returns)
505
+ supported_params = contract.get("supported_params")
506
+ if isinstance(supported_params, list):
507
+ lines.append(f"- supported_params: {_format_list(supported_params)}")
508
+ sort_values = contract.get("sort_values")
509
+ if isinstance(sort_values, list):
510
+ lines.append(f"- sort_values: {_format_list(sort_values)}")
511
+ sort_values_by_repo_type = contract.get("sort_values_by_repo_type")
512
+ if isinstance(sort_values_by_repo_type, Mapping):
513
+ lines.append("- sort_values_by_repo_type:")
514
+ for repo_type, values in sort_values_by_repo_type.items():
515
+ if isinstance(values, list):
516
+ lines.append(f" - {repo_type}: {_format_list(values)}")
517
+ expand_values = contract.get("expand_values")
518
+ if isinstance(expand_values, list):
519
+ lines.append(f"- expand_values: {_format_list(expand_values)}")
520
+ _append_param_values(lines, contract.get("param_values"))
521
+ _append_named_contract(lines, "fields_contract", contract.get("fields_contract"))
522
+ _append_named_contract(lines, "where_contract", contract.get("where_contract"))
523
+ _append_named_contract(
524
+ lines, "post_filter_contract", contract.get("post_filter_contract")
525
+ )
526
+ _append_limit_contract(lines, contract.get("limit_contract"))
527
+ notes = contract.get("notes")
528
+ if isinstance(notes, str):
529
+ lines.append(f"- notes: {notes}")
530
+ lines.append("")
531
+ return "\n".join(lines).rstrip() + "\n"
.prod/monty_api/helpers/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .activity import register_activity_helpers
2
+ from .collections import register_collection_helpers
3
+ from .introspection import register_introspection_helpers
4
+ from .profiles import register_profile_helpers
5
+ from .repos import register_repo_helpers
6
+
7
+ __all__ = [
8
+ "register_activity_helpers",
9
+ "register_collection_helpers",
10
+ "register_introspection_helpers",
11
+ "register_profile_helpers",
12
+ "register_repo_helpers",
13
+ ]
.prod/monty_api/helpers/activity.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
4
+ from functools import partial
5
+ from typing import Any, Callable
6
+
7
+ from ..constants import (
8
+ ACTIVITY_CANONICAL_FIELDS,
9
+ EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
10
+ RECENT_ACTIVITY_PAGE_SIZE,
11
+ RECENT_ACTIVITY_SCAN_MAX_PAGES,
12
+ )
13
+ from ..context_types import HelperRuntimeContext
14
+
15
+
16
+ async def hf_recent_activity(
17
+ ctx: HelperRuntimeContext,
18
+ feed_type: str | None = None,
19
+ entity: str | None = None,
20
+ activity_types: list[str] | None = None,
21
+ repo_types: list[str] | None = None,
22
+ limit: int | None = None,
23
+ max_pages: int | None = None,
24
+ start_cursor: str | None = None,
25
+ count_only: bool = False,
26
+ where: dict[str, Any] | None = None,
27
+ fields: list[str] | None = None,
28
+ ) -> dict[str, Any]:
29
+ start_calls = ctx.call_count["n"]
30
+ default_limit = ctx._policy_int("hf_recent_activity", "default_limit", 100)
31
+ page_cap = ctx._policy_int(
32
+ "hf_recent_activity", "page_limit", RECENT_ACTIVITY_PAGE_SIZE
33
+ )
34
+ pages_cap = ctx._policy_int(
35
+ "hf_recent_activity", "max_pages", RECENT_ACTIVITY_SCAN_MAX_PAGES
36
+ )
37
+ requested_max_pages = max_pages
38
+ ft = str(feed_type or "").strip().lower()
39
+ ent = str(entity or "").strip()
40
+ if ft not in {"user", "org"}:
41
+ if ft and (not ent):
42
+ ent = ft
43
+ ft = "user"
44
+ elif not ft and ent:
45
+ ft = "user"
46
+ if ft not in {"user", "org"}:
47
+ return ctx._helper_error(
48
+ start_calls=start_calls,
49
+ source="/api/recent-activity",
50
+ error="feed_type must be 'user' or 'org'",
51
+ )
52
+ if not ent:
53
+ return ctx._helper_error(
54
+ start_calls=start_calls,
55
+ source="/api/recent-activity",
56
+ error="entity is required",
57
+ )
58
+ limit_plan = ctx._resolve_exhaustive_limits(
59
+ limit=limit,
60
+ count_only=count_only,
61
+ default_limit=default_limit,
62
+ max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
63
+ )
64
+ applied_limit = int(limit_plan["applied_limit"])
65
+ page_lim = page_cap
66
+ pages_lim = ctx._clamp_int(
67
+ requested_max_pages, default=pages_cap, minimum=1, maximum=pages_cap
68
+ )
69
+ type_filter = {
70
+ str(t).strip().lower() for t in activity_types or [] if str(t).strip()
71
+ }
72
+ repo_filter = {
73
+ ctx._canonical_repo_type(t, default="")
74
+ for t in repo_types or []
75
+ if str(t).strip()
76
+ }
77
+ next_cursor = (
78
+ str(start_cursor).strip()
79
+ if isinstance(start_cursor, str) and start_cursor.strip()
80
+ else None
81
+ )
82
+ items: list[dict[str, Any]] = []
83
+ scanned = 0
84
+ matched = 0
85
+ pages = 0
86
+ exhausted_feed = False
87
+ stopped_for_budget = False
88
+ try:
89
+ normalized_where = ctx._normalize_where(
90
+ where, allowed_fields=ACTIVITY_CANONICAL_FIELDS
91
+ )
92
+ except ValueError as exc:
93
+ return ctx._helper_error(
94
+ start_calls=start_calls,
95
+ source="/api/recent-activity",
96
+ error=exc,
97
+ )
98
+ while pages < pages_lim and (applied_limit == 0 or len(items) < applied_limit):
99
+ if ctx._budget_remaining() <= 0:
100
+ stopped_for_budget = True
101
+ break
102
+ params: dict[str, Any] = {"feedType": ft, "entity": ent, "limit": page_lim}
103
+ if next_cursor:
104
+ params["cursor"] = next_cursor
105
+ resp = ctx._host_raw_call("/api/recent-activity", params=params)
106
+ if not resp.get("ok"):
107
+ if pages == 0:
108
+ return ctx._helper_error(
109
+ start_calls=start_calls,
110
+ source="/api/recent-activity",
111
+ error=resp.get("error") or "recent-activity fetch failed",
112
+ )
113
+ break
114
+ payload = resp.get("data") if isinstance(resp.get("data"), dict) else {}
115
+ rows = (
116
+ payload.get("recentActivity")
117
+ if isinstance(payload.get("recentActivity"), list)
118
+ else []
119
+ )
120
+ cursor_raw = payload.get("cursor")
121
+ next_cursor = cursor_raw if isinstance(cursor_raw, str) and cursor_raw else None
122
+ pages += 1
123
+ if not rows:
124
+ exhausted_feed = True
125
+ break
126
+ for row in rows:
127
+ if not isinstance(row, dict):
128
+ continue
129
+ scanned += 1
130
+ typ = str(row.get("type") or "").strip().lower()
131
+ repo_id = row.get("repoId")
132
+ repo_type = row.get("repoType")
133
+ repo_data = (
134
+ row.get("repoData") if isinstance(row.get("repoData"), dict) else None
135
+ )
136
+ repo_obj = row.get("repo") if isinstance(row.get("repo"), dict) else None
137
+ if repo_id is None and repo_data is not None:
138
+ repo_id = repo_data.get("id") or repo_data.get("name")
139
+ if repo_id is None and repo_obj is not None:
140
+ repo_id = repo_obj.get("id") or repo_obj.get("name")
141
+ if repo_type is None and repo_data is not None:
142
+ repo_type = repo_data.get("type")
143
+ if repo_type is None and repo_obj is not None:
144
+ repo_type = repo_obj.get("type")
145
+ rt = ctx._canonical_repo_type(repo_type, default="") if repo_type else ""
146
+ if type_filter and typ not in type_filter:
147
+ continue
148
+ if repo_filter and rt not in repo_filter:
149
+ continue
150
+ item = {
151
+ "timestamp": row.get("time"),
152
+ "event_type": row.get("type"),
153
+ "repo_type": rt or repo_type,
154
+ "repo_id": repo_id,
155
+ }
156
+ if not ctx._item_matches_where(item, normalized_where):
157
+ continue
158
+ matched += 1
159
+ if len(items) < applied_limit:
160
+ items.append(item)
161
+ if not next_cursor:
162
+ exhausted_feed = True
163
+ break
164
+ try:
165
+ items = ctx._project_activity_items(items, fields)
166
+ except ValueError as exc:
167
+ return ctx._helper_error(
168
+ start_calls=start_calls,
169
+ source="/api/recent-activity",
170
+ error=exc,
171
+ )
172
+ exact_count = exhausted_feed and (not stopped_for_budget)
173
+ sample_complete = (
174
+ exact_count and applied_limit >= matched and (not count_only or matched == 0)
175
+ )
176
+ page_limit_hit = (
177
+ next_cursor is not None and pages >= pages_lim and (not exhausted_feed)
178
+ )
179
+ more_available: bool | str = ctx._derive_more_available(
180
+ sample_complete=sample_complete,
181
+ exact_count=exact_count,
182
+ returned=len(items),
183
+ total=matched if exact_count else None,
184
+ )
185
+ if next_cursor is not None:
186
+ more_available = True
187
+ elif stopped_for_budget and (not exact_count):
188
+ more_available = "unknown"
189
+ meta = ctx._build_exhaustive_result_meta(
190
+ base_meta={
191
+ "scanned": scanned,
192
+ "total": matched,
193
+ "total_matched": matched,
194
+ "pages": pages,
195
+ "count_source": "scan" if exact_count else "none",
196
+ "lower_bound": not exact_count,
197
+ "page_limit": page_lim,
198
+ "stopped_for_budget": stopped_for_budget,
199
+ "feed_type": ft,
200
+ "entity": ent,
201
+ },
202
+ limit_plan=limit_plan,
203
+ matched_count=matched,
204
+ returned_count=len(items),
205
+ exact_count=exact_count,
206
+ count_only=count_only,
207
+ sample_complete=sample_complete,
208
+ more_available=more_available,
209
+ page_limit_hit=page_limit_hit,
210
+ truncated_extra=stopped_for_budget,
211
+ requested_max_pages=requested_max_pages,
212
+ applied_max_pages=pages_lim,
213
+ )
214
+ return ctx._helper_success(
215
+ start_calls=start_calls,
216
+ source="/api/recent-activity",
217
+ items=items,
218
+ meta=meta,
219
+ cursor=next_cursor,
220
+ )
221
+
222
+
223
+ def register_activity_helpers(
224
+ ctx: HelperRuntimeContext,
225
+ ) -> dict[str, Callable[..., Any]]:
226
+ return {"hf_recent_activity": partial(hf_recent_activity, ctx)}
.prod/monty_api/helpers/collections.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
4
+ from functools import partial
5
+ from typing import Any, Callable
6
+
7
+ from ..constants import (
8
+ COLLECTION_CANONICAL_FIELDS,
9
+ OUTPUT_ITEMS_TRUNCATION_LIMIT,
10
+ REPO_CANONICAL_FIELDS,
11
+ )
12
+ from ..context_types import HelperRuntimeContext
13
+
14
+
15
+ async def hf_collections_search(
16
+ ctx: HelperRuntimeContext,
17
+ query: str | None = None,
18
+ owner: str | None = None,
19
+ limit: int = 20,
20
+ count_only: bool = False,
21
+ where: dict[str, Any] | None = None,
22
+ fields: list[str] | None = None,
23
+ ) -> dict[str, Any]:
24
+ start_calls = ctx.call_count["n"]
25
+ default_limit = ctx._policy_int("hf_collections_search", "default_limit", 20)
26
+ max_limit = ctx._policy_int(
27
+ "hf_collections_search", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
28
+ )
29
+ if count_only:
30
+ limit = 0
31
+ applied_limit = ctx._clamp_int(
32
+ limit,
33
+ default=default_limit,
34
+ minimum=0,
35
+ maximum=max_limit,
36
+ )
37
+ owner_clean = str(owner or "").strip() or None
38
+ owner_casefold = owner_clean.casefold() if owner_clean is not None else None
39
+ fetch_limit = max_limit if applied_limit == 0 or owner_clean else applied_limit
40
+ if owner_clean:
41
+ fetch_limit = min(fetch_limit, 100)
42
+ term = str(query or "").strip()
43
+ if not term and owner_clean:
44
+ term = owner_clean
45
+ if not term:
46
+ return ctx._helper_error(
47
+ start_calls=start_calls,
48
+ source="/api/collections",
49
+ error="query or owner is required",
50
+ )
51
+ params: dict[str, Any] = {"limit": fetch_limit}
52
+ if term:
53
+ params["q"] = term
54
+ if owner_clean:
55
+ params["owner"] = owner_clean
56
+ resp = ctx._host_raw_call("/api/collections", params=params)
57
+ if not resp.get("ok"):
58
+ return ctx._helper_error(
59
+ start_calls=start_calls,
60
+ source="/api/collections",
61
+ error=resp.get("error") or "collections fetch failed",
62
+ )
63
+ payload = resp.get("data") if isinstance(resp.get("data"), list) else []
64
+
65
+ def _row_owner_matches_owner(row: Any) -> bool:
66
+ if owner_casefold is None or not isinstance(row, dict):
67
+ return owner_casefold is None
68
+ row_owner = ctx._author_from_any(row.get("owner")) or ctx._author_from_any(
69
+ row.get("ownerData")
70
+ )
71
+ if (
72
+ not row_owner
73
+ and isinstance(row.get("slug"), str)
74
+ and "/" in str(row.get("slug"))
75
+ ):
76
+ row_owner = str(row.get("slug")).split("/", 1)[0]
77
+ if not isinstance(row_owner, str) or not row_owner:
78
+ return False
79
+ return row_owner.casefold() == owner_casefold
80
+
81
+ owner_fallback_used = False
82
+ if owner_casefold is not None and not any(
83
+ _row_owner_matches_owner(row) for row in payload
84
+ ):
85
+ fallback_params: dict[str, Any] = {"limit": fetch_limit}
86
+ if term:
87
+ fallback_params["q"] = term
88
+ fallback_resp = ctx._host_raw_call("/api/collections", params=fallback_params)
89
+ if fallback_resp.get("ok"):
90
+ fallback_payload = (
91
+ fallback_resp.get("data")
92
+ if isinstance(fallback_resp.get("data"), list)
93
+ else []
94
+ )
95
+ if any(_row_owner_matches_owner(row) for row in fallback_payload):
96
+ payload = fallback_payload
97
+ owner_fallback_used = True
98
+
99
+ items: list[dict[str, Any]] = []
100
+ for row in payload[:fetch_limit]:
101
+ if not isinstance(row, dict):
102
+ continue
103
+ row_owner = ctx._author_from_any(row.get("owner")) or ctx._author_from_any(
104
+ row.get("ownerData")
105
+ )
106
+ if (
107
+ not row_owner
108
+ and isinstance(row.get("slug"), str)
109
+ and "/" in str(row.get("slug"))
110
+ ):
111
+ row_owner = str(row.get("slug")).split("/", 1)[0]
112
+ if owner_casefold is not None and (
113
+ not isinstance(row_owner, str) or row_owner.casefold() != owner_casefold
114
+ ):
115
+ continue
116
+ owner_payload = row.get("owner") if isinstance(row.get("owner"), dict) else {}
117
+ collection_items = (
118
+ row.get("items") if isinstance(row.get("items"), list) else []
119
+ )
120
+ slug = row.get("slug")
121
+ items.append(
122
+ {
123
+ "collection_id": slug,
124
+ "slug": slug,
125
+ "title": row.get("title"),
126
+ "owner": row_owner,
127
+ "owner_type": owner_payload.get("type")
128
+ if isinstance(owner_payload.get("type"), str)
129
+ else None,
130
+ "description": row.get("description"),
131
+ "gating": row.get("gating"),
132
+ "last_updated": row.get("lastUpdated"),
133
+ "item_count": len(collection_items),
134
+ }
135
+ )
136
+ try:
137
+ items = ctx._apply_where(
138
+ items, where, allowed_fields=COLLECTION_CANONICAL_FIELDS
139
+ )
140
+ except ValueError as exc:
141
+ return ctx._helper_error(
142
+ start_calls=start_calls,
143
+ source="/api/collections",
144
+ error=exc,
145
+ )
146
+ total_matched = len(items)
147
+ items = items[:applied_limit]
148
+ try:
149
+ items = ctx._project_collection_items(items, fields)
150
+ except ValueError as exc:
151
+ return ctx._helper_error(
152
+ start_calls=start_calls,
153
+ source="/api/collections",
154
+ error=exc,
155
+ )
156
+ truncated = (
157
+ applied_limit > 0 and total_matched > applied_limit
158
+ or (applied_limit == 0 and len(payload) >= fetch_limit)
159
+ )
160
+ return ctx._helper_success(
161
+ start_calls=start_calls,
162
+ source="/api/collections",
163
+ items=items,
164
+ scanned=len(payload),
165
+ matched=total_matched,
166
+ returned=len(items),
167
+ total=len(payload),
168
+ total_matched=total_matched,
169
+ total_population=len(payload),
170
+ truncated=truncated,
171
+ complete=not truncated,
172
+ query=term,
173
+ owner=owner_clean,
174
+ owner_case_insensitive_fallback=owner_fallback_used,
175
+ )
176
+
177
+
178
+ async def hf_collection_items(
179
+ ctx: HelperRuntimeContext,
180
+ collection_id: str,
181
+ repo_types: list[str] | None = None,
182
+ limit: int = 100,
183
+ count_only: bool = False,
184
+ where: dict[str, Any] | None = None,
185
+ fields: list[str] | None = None,
186
+ ) -> dict[str, Any]:
187
+ start_calls = ctx.call_count["n"]
188
+ default_limit = ctx._policy_int("hf_collection_items", "default_limit", 100)
189
+ max_limit = ctx._policy_int(
190
+ "hf_collection_items", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
191
+ )
192
+ cid = str(collection_id or "").strip()
193
+ if not cid:
194
+ return ctx._helper_error(
195
+ start_calls=start_calls,
196
+ source="/api/collections/<collection_id>",
197
+ error="collection_id is required",
198
+ )
199
+ if count_only:
200
+ limit = 0
201
+ applied_limit = ctx._clamp_int(
202
+ limit,
203
+ default=default_limit,
204
+ minimum=0,
205
+ maximum=max_limit,
206
+ )
207
+ allowed_repo_types: set[str] | None = None
208
+ try:
209
+ raw_repo_types = (
210
+ ctx._coerce_str_list(repo_types) if repo_types is not None else []
211
+ )
212
+ except ValueError as exc:
213
+ return ctx._helper_error(
214
+ start_calls=start_calls,
215
+ source=f"/api/collections/{cid}",
216
+ error=exc,
217
+ collection_id=cid,
218
+ )
219
+ if raw_repo_types:
220
+ allowed_repo_types = set()
221
+ for raw in raw_repo_types:
222
+ canonical = ctx._canonical_repo_type(raw, default="")
223
+ if canonical not in {"model", "dataset", "space"}:
224
+ return ctx._helper_error(
225
+ start_calls=start_calls,
226
+ source=f"/api/collections/{cid}",
227
+ error=f"Unsupported repo_type '{raw}'",
228
+ collection_id=cid,
229
+ )
230
+ allowed_repo_types.add(canonical)
231
+ endpoint = f"/api/collections/{cid}"
232
+ resp = ctx._host_raw_call(endpoint)
233
+ if not resp.get("ok"):
234
+ return ctx._helper_error(
235
+ start_calls=start_calls,
236
+ source=endpoint,
237
+ error=resp.get("error") or "collection fetch failed",
238
+ collection_id=cid,
239
+ )
240
+ payload = resp.get("data") if isinstance(resp.get("data"), dict) else {}
241
+ raw_items = payload.get("items") if isinstance(payload.get("items"), list) else []
242
+ owner = ctx._author_from_any(payload.get("owner"))
243
+ owner_payload = (
244
+ payload.get("owner") if isinstance(payload.get("owner"), dict) else {}
245
+ )
246
+ if owner is None and "/" in cid:
247
+ owner = cid.split("/", 1)[0]
248
+ try:
249
+ normalized_where = ctx._normalize_where(
250
+ where, allowed_fields=REPO_CANONICAL_FIELDS
251
+ )
252
+ except ValueError as exc:
253
+ return ctx._helper_error(
254
+ start_calls=start_calls,
255
+ source=endpoint,
256
+ error=exc,
257
+ collection_id=cid,
258
+ )
259
+ normalized: list[dict[str, Any]] = []
260
+ for row in raw_items:
261
+ if not isinstance(row, dict):
262
+ continue
263
+ item = ctx._normalize_collection_repo_item(row)
264
+ if item is None:
265
+ continue
266
+ repo_type = item.get("repo_type")
267
+ if allowed_repo_types is not None and repo_type not in allowed_repo_types:
268
+ continue
269
+ if not ctx._item_matches_where(item, normalized_where):
270
+ continue
271
+ normalized.append(item)
272
+ total_matched = len(normalized)
273
+ items = [] if count_only else normalized[:applied_limit]
274
+ try:
275
+ items = ctx._project_repo_items(items, fields)
276
+ except ValueError as exc:
277
+ return ctx._helper_error(
278
+ start_calls=start_calls,
279
+ source=endpoint,
280
+ error=exc,
281
+ collection_id=cid,
282
+ )
283
+ truncated = applied_limit > 0 and total_matched > applied_limit
284
+ return ctx._helper_success(
285
+ start_calls=start_calls,
286
+ source=endpoint,
287
+ items=items,
288
+ scanned=len(raw_items),
289
+ matched=total_matched,
290
+ returned=len(items),
291
+ total=len(raw_items),
292
+ total_matched=total_matched,
293
+ total_population=len(raw_items),
294
+ truncated=truncated,
295
+ complete=not truncated,
296
+ collection_id=cid,
297
+ title=payload.get("title"),
298
+ owner=owner,
299
+ owner_type=owner_payload.get("type")
300
+ if isinstance(owner_payload.get("type"), str)
301
+ else None,
302
+ repo_types=sorted(allowed_repo_types)
303
+ if allowed_repo_types is not None
304
+ else None,
305
+ )
306
+
307
+
308
+ def register_collection_helpers(
309
+ ctx: HelperRuntimeContext,
310
+ ) -> dict[str, Callable[..., Any]]:
311
+ return {
312
+ "hf_collections_search": partial(hf_collections_search, ctx),
313
+ "hf_collection_items": partial(hf_collection_items, ctx),
314
+ }
.prod/monty_api/helpers/common.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+
4
+ from ..context_types import HelperRuntimeContext
5
+
6
+
7
+ async def resolve_username_or_current(
8
+ ctx: HelperRuntimeContext,
9
+ username: str | None,
10
+ ) -> tuple[str | None, str | None]:
11
+ resolved = str(username or "").strip()
12
+ if resolved:
13
+ return resolved, None
14
+
15
+ whoami = await ctx.call_helper("hf_whoami")
16
+ if whoami.get("ok") is not True:
17
+ return (
18
+ None,
19
+ str(whoami.get("error") or "Could not resolve current authenticated user"),
20
+ )
21
+ item = ctx._helper_item(whoami)
22
+ current = item.get("username") if isinstance(item, dict) else None
23
+ if not isinstance(current, str) or not current.strip():
24
+ return (
25
+ None,
26
+ "username was not provided and current authenticated user could not be resolved",
27
+ )
28
+ return current.strip(), None
.prod/monty_api/helpers/introspection.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
4
+ import inspect
5
+ from functools import partial
6
+ from typing import Any, Callable
7
+
8
+ from ..helper_contracts import build_helper_contracts
9
+ from ..constants import (
10
+ ACTIVITY_CANONICAL_FIELDS,
11
+ ACTOR_CANONICAL_FIELDS,
12
+ COLLECTION_CANONICAL_FIELDS,
13
+ DAILY_PAPER_CANONICAL_FIELDS,
14
+ DISCUSSION_CANONICAL_FIELDS,
15
+ DISCUSSION_DETAIL_CANONICAL_FIELDS,
16
+ DEFAULT_MAX_CALLS,
17
+ DEFAULT_TIMEOUT_SEC,
18
+ GRAPH_SCAN_LIMIT_CAP,
19
+ LIKES_SCAN_LIMIT_CAP,
20
+ MAX_CALLS_LIMIT,
21
+ OUTPUT_ITEMS_TRUNCATION_LIMIT,
22
+ PROFILE_CANONICAL_FIELDS,
23
+ RECENT_ACTIVITY_SCAN_MAX_PAGES,
24
+ REPO_CANONICAL_FIELDS,
25
+ TRENDING_ENDPOINT_MAX_LIMIT,
26
+ USER_CANONICAL_FIELDS,
27
+ USER_LIKES_CANONICAL_FIELDS,
28
+ )
29
+ from ..context_types import HelperRuntimeContext
30
+ from ..registry import (
31
+ HELPER_COVERED_ENDPOINT_PATTERNS,
32
+ HELPER_DEFAULT_METADATA,
33
+ PAGINATION_POLICY,
34
+ )
35
+
36
+
37
+ def _render_annotation(annotation: Any) -> str:
38
+ if annotation is inspect.Signature.empty:
39
+ return "Any"
40
+ return str(annotation)
41
+
42
+
43
+ def _render_default(default: Any) -> str | None:
44
+ if default is inspect.Signature.empty:
45
+ return None
46
+ return repr(default)
47
+
48
+
49
+ def _signature_payload(fn: Callable[..., Any]) -> dict[str, Any]:
50
+ signature = inspect.signature(fn)
51
+ parameters: list[dict[str, Any]] = []
52
+ for parameter in signature.parameters.values():
53
+ item: dict[str, Any] = {
54
+ "name": parameter.name,
55
+ "kind": str(parameter.kind).replace("Parameter.", "").lower(),
56
+ "annotation": _render_annotation(parameter.annotation),
57
+ "required": parameter.default is inspect.Signature.empty,
58
+ }
59
+ default = _render_default(parameter.default)
60
+ if default is not None:
61
+ item["default"] = default
62
+ parameters.append(item)
63
+ return {
64
+ "parameters": parameters,
65
+ "returns": _render_annotation(signature.return_annotation),
66
+ }
67
+
68
+
69
+ async def hf_runtime_capabilities(
70
+ ctx: HelperRuntimeContext,
71
+ section: str | None = None,
72
+ ) -> dict[str, Any]:
73
+ start_calls = ctx.call_count["n"]
74
+ ctx.internal_helper_used["used"] = True
75
+
76
+ helper_functions = {
77
+ **ctx.helper_registry,
78
+ "hf_runtime_capabilities": partial(hf_runtime_capabilities, ctx),
79
+ }
80
+ helper_payload = {
81
+ name: _signature_payload(fn) for name, fn in sorted(helper_functions.items())
82
+ }
83
+ helper_contracts = build_helper_contracts(helper_functions)
84
+ repo_type_helper_names = {
85
+ "dataset": "hf_datasets_search",
86
+ "model": "hf_models_search",
87
+ "space": "hf_spaces_search",
88
+ }
89
+
90
+ def _helper_contract(name: str) -> dict[str, Any]:
91
+ contract = helper_contracts.get(name)
92
+ return dict(contract) if isinstance(contract, dict) else {}
93
+
94
+ def _type_specific_params(name: str) -> list[str]:
95
+ params = _helper_contract(name).get("supported_params")
96
+ if not isinstance(params, list):
97
+ return []
98
+ common = {
99
+ "search",
100
+ "filter",
101
+ "author",
102
+ "sort",
103
+ "limit",
104
+ "expand",
105
+ "full",
106
+ "fields",
107
+ "post_filter",
108
+ }
109
+ return [param for param in params if param not in common]
110
+
111
+ manifest: dict[str, Any] = {
112
+ "overview": {
113
+ "helper_count": len(helper_functions),
114
+ "supports_current_user": True,
115
+ "helper_result_envelope": {
116
+ "ok": "bool",
117
+ "item": "dict | None",
118
+ "items": "list[dict]",
119
+ "meta": "dict",
120
+ "error": "str | None",
121
+ },
122
+ "raw_result_envelope": {
123
+ "result": "Any",
124
+ "meta": {
125
+ "ok": "bool",
126
+ "api_calls": "int",
127
+ "elapsed_ms": "int",
128
+ "limits_reached": "bool",
129
+ "limit_summary": "list[dict]",
130
+ },
131
+ },
132
+ },
133
+ "helpers": helper_payload,
134
+ "helper_contracts": helper_contracts,
135
+ "fields": {
136
+ "profile": list(PROFILE_CANONICAL_FIELDS),
137
+ "repo": list(REPO_CANONICAL_FIELDS),
138
+ "user": list(USER_CANONICAL_FIELDS),
139
+ "actor": list(ACTOR_CANONICAL_FIELDS),
140
+ "user_likes": list(USER_LIKES_CANONICAL_FIELDS),
141
+ "activity": list(ACTIVITY_CANONICAL_FIELDS),
142
+ "collection": list(COLLECTION_CANONICAL_FIELDS),
143
+ "daily_paper": list(DAILY_PAPER_CANONICAL_FIELDS),
144
+ "discussion": list(DISCUSSION_CANONICAL_FIELDS),
145
+ "discussion_detail": list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
146
+ },
147
+ "helper_defaults": {
148
+ helper_name: dict(sorted(metadata.items()))
149
+ for helper_name, metadata in sorted(HELPER_DEFAULT_METADATA.items())
150
+ },
151
+ "limits": {
152
+ "default_timeout_sec": DEFAULT_TIMEOUT_SEC,
153
+ "default_max_calls": DEFAULT_MAX_CALLS,
154
+ "max_calls_limit": MAX_CALLS_LIMIT,
155
+ "output_items_truncation_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT,
156
+ "graph_scan_limit_cap": GRAPH_SCAN_LIMIT_CAP,
157
+ "likes_scan_limit_cap": LIKES_SCAN_LIMIT_CAP,
158
+ "recent_activity_scan_max_pages": RECENT_ACTIVITY_SCAN_MAX_PAGES,
159
+ "trending_endpoint_max_limit": TRENDING_ENDPOINT_MAX_LIMIT,
160
+ "pagination_policy": {
161
+ helper_name: dict(sorted(policy.items()))
162
+ for helper_name, policy in sorted(PAGINATION_POLICY.items())
163
+ },
164
+ "helper_covered_endpoint_patterns": [
165
+ {"pattern": pattern, "helper": helper_name}
166
+ for pattern, helper_name in HELPER_COVERED_ENDPOINT_PATTERNS
167
+ ],
168
+ },
169
+ "repo_search": {
170
+ "helper_selection": {
171
+ "preferred_rule": (
172
+ "Prefer hf_models_search for model queries, hf_datasets_search for "
173
+ "dataset queries, and hf_spaces_search for space queries. Use "
174
+ "hf_repo_search only for intentionally cross-type search."
175
+ ),
176
+ "model": "hf_models_search",
177
+ "dataset": "hf_datasets_search",
178
+ "space": "hf_spaces_search",
179
+ "cross_type": "hf_repo_search",
180
+ },
181
+ "can_do": [
182
+ "search models",
183
+ "search datasets",
184
+ "search spaces",
185
+ "search across multiple repo types",
186
+ "project selected fields",
187
+ "apply local post-fetch row filtering",
188
+ ],
189
+ "parameter_contract": {
190
+ "search": {
191
+ "meaning": "Upstream Hugging Face search text.",
192
+ },
193
+ "filter": {
194
+ "meaning": (
195
+ "Upstream Hugging Face filter/tag argument passed directly into "
196
+ "the Hub client."
197
+ ),
198
+ },
199
+ "post_filter": {
200
+ "meaning": (
201
+ "Local predicate applied after the rows are fetched and normalized."
202
+ ),
203
+ "recommended_shapes": [
204
+ {"runtime_stage": "RUNNING"},
205
+ {"runtime_stage": {"in": ["BUILD_ERROR", "RUNTIME_ERROR"]}},
206
+ {"downloads": {"gte": 1000}},
207
+ {"likes": {"lte": 5000}},
208
+ ],
209
+ "prefer_for": [
210
+ "normalized returned fields such as runtime_stage",
211
+ "downloads / likes thresholds after a broad search",
212
+ ],
213
+ "avoid_when": [
214
+ "author is already a first-class helper argument",
215
+ "pipeline_tag is already a first-class model-search argument",
216
+ "dataset_name, language, task_ids, apps, models, or datasets already have first-class helper args",
217
+ ],
218
+ },
219
+ "fields": {
220
+ "meaning": "Select which normalized row fields are returned to the caller.",
221
+ "canonical_only": True,
222
+ },
223
+ },
224
+ "repo_type_specific_helpers": {
225
+ repo_type: {
226
+ "helper": helper_name,
227
+ "supported_params": _helper_contract(helper_name).get(
228
+ "supported_params"
229
+ ),
230
+ "type_specific_params": _type_specific_params(helper_name),
231
+ "sort_values": _helper_contract(helper_name).get("sort_values"),
232
+ "expand_values": _helper_contract(helper_name).get("expand_values"),
233
+ "fields_contract": _helper_contract(helper_name).get(
234
+ "fields_contract"
235
+ ),
236
+ "post_filter_contract": _helper_contract(helper_name).get(
237
+ "post_filter_contract"
238
+ ),
239
+ }
240
+ for repo_type, helper_name in sorted(repo_type_helper_names.items())
241
+ },
242
+ "generic_helper": {
243
+ "helper": "hf_repo_search",
244
+ "use_for": "Intentionally cross-type search only.",
245
+ "supports": _helper_contract("hf_repo_search").get("supported_params"),
246
+ "sort_values_by_repo_type": _helper_contract("hf_repo_search").get(
247
+ "sort_values_by_repo_type"
248
+ ),
249
+ "fields_contract": _helper_contract("hf_repo_search").get(
250
+ "fields_contract"
251
+ ),
252
+ "post_filter_contract": _helper_contract("hf_repo_search").get(
253
+ "post_filter_contract"
254
+ ),
255
+ "does_not_support": [
256
+ "repo-type-specific knobs such as pipeline_tag or dataset_name",
257
+ "nested advanced routing",
258
+ ],
259
+ },
260
+ "space_runtime_contract": {
261
+ "returned_field": "runtime_stage",
262
+ "full_runtime_field": "runtime",
263
+ "preferred_filter_channel": "post_filter",
264
+ "note": (
265
+ "Treat runtime_stage like any other returned field: use exact values "
266
+ "or an 'in' list in post_filter."
267
+ ),
268
+ "common_values": ["BUILD_ERROR", "RUNTIME_ERROR", "RUNNING", "SLEEPING"],
269
+ },
270
+ },
271
+ }
272
+ allowed_sections = sorted(manifest)
273
+ requested = str(section or "").strip().lower()
274
+ if requested:
275
+ if requested not in manifest:
276
+ return ctx._helper_error(
277
+ start_calls=start_calls,
278
+ source="internal://runtime-capabilities",
279
+ error=f"Unsupported section {section!r}. Allowed sections: {allowed_sections}",
280
+ section=section,
281
+ allowed_sections=allowed_sections,
282
+ )
283
+ payload = {
284
+ "section": requested,
285
+ "content": manifest[requested],
286
+ "allowed_sections": allowed_sections,
287
+ }
288
+ else:
289
+ payload = {"allowed_sections": allowed_sections, **manifest}
290
+ return ctx._helper_success(
291
+ start_calls=start_calls,
292
+ source="internal://runtime-capabilities",
293
+ items=[payload],
294
+ section=requested or None,
295
+ )
296
+
297
+
298
+ def register_introspection_helpers(
299
+ ctx: HelperRuntimeContext,
300
+ ) -> dict[str, Callable[..., Any]]:
301
+ return {"hf_runtime_capabilities": partial(hf_runtime_capabilities, ctx)}
.prod/monty_api/helpers/profiles.py ADDED
@@ -0,0 +1,861 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
4
+ from itertools import islice
5
+ import re
6
+ from typing import Any, Callable
7
+ from ..context_types import HelperRuntimeContext
8
+ from ..constants import (
9
+ ACTOR_CANONICAL_FIELDS,
10
+ EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
11
+ GRAPH_SCAN_LIMIT_CAP,
12
+ OUTPUT_ITEMS_TRUNCATION_LIMIT,
13
+ USER_SUMMARY_ACTIVITY_MAX_PAGES,
14
+ USER_SUMMARY_LIKES_SCAN_LIMIT,
15
+ )
16
+
17
+
18
+ from .common import resolve_username_or_current
19
+
20
+ from functools import partial
21
+
22
+
23
+ def _clean_social_handle(value: Any) -> str | None:
24
+ if not isinstance(value, str):
25
+ return None
26
+ cleaned = value.strip()
27
+ if not cleaned:
28
+ return None
29
+ if re.match("^https?://", cleaned, flags=re.IGNORECASE):
30
+ return cleaned
31
+ return cleaned.lstrip("@")
32
+
33
+
34
+ def _social_url(kind: str, value: Any) -> str | None:
35
+ cleaned = _clean_social_handle(value)
36
+ if cleaned is None:
37
+ return None
38
+ if re.match("^https?://", cleaned, flags=re.IGNORECASE):
39
+ return cleaned
40
+ if kind == "twitter":
41
+ return f"https://twitter.com/{cleaned}"
42
+ if kind == "github":
43
+ return f"https://github.com/{cleaned}"
44
+ if kind == "linkedin":
45
+ if cleaned.startswith(("in/", "company/")):
46
+ return f"https://www.linkedin.com/{cleaned}"
47
+ return f"https://www.linkedin.com/in/{cleaned}"
48
+ if kind == "bluesky":
49
+ return f"https://bsky.app/profile/{cleaned}"
50
+ return cleaned
51
+
52
+
53
+ async def hf_whoami(ctx: HelperRuntimeContext) -> dict[str, Any]:
54
+ start_calls = ctx.call_count["n"]
55
+ endpoint = "/api/whoami-v2"
56
+ token = ctx._load_token()
57
+ if token is None:
58
+ return ctx._helper_error(
59
+ start_calls=start_calls,
60
+ source=endpoint,
61
+ error="Current authenticated user is unavailable for this request. No request-scoped or fallback HF token was found.",
62
+ )
63
+ try:
64
+ payload = ctx._host_hf_call(
65
+ endpoint,
66
+ lambda: ctx._get_hf_api_client().whoami(token=token, cache=True),
67
+ )
68
+ except Exception as e:
69
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
70
+ username = payload.get("name") or payload.get("user") or payload.get("username")
71
+ item = {
72
+ "username": username,
73
+ "fullname": payload.get("fullname"),
74
+ "is_pro": payload.get("isPro"),
75
+ }
76
+ items = [item] if isinstance(username, str) and username else []
77
+ return ctx._helper_success(
78
+ start_calls=start_calls,
79
+ source=endpoint,
80
+ items=items,
81
+ scanned=1,
82
+ matched=len(items),
83
+ returned=len(items),
84
+ truncated=False,
85
+ )
86
+
87
+
88
+ async def _hf_user_overview(ctx: HelperRuntimeContext, username: str) -> dict[str, Any]:
89
+ start_calls = ctx.call_count["n"]
90
+ u = str(username or "").strip()
91
+ if not u:
92
+ return ctx._helper_error(
93
+ start_calls=start_calls,
94
+ source="/api/users/<u>/overview",
95
+ error="username is required",
96
+ )
97
+ endpoint = f"/api/users/{u}/overview"
98
+ try:
99
+ obj = ctx._host_hf_call(
100
+ endpoint, lambda: ctx._get_hf_api_client().get_user_overview(u)
101
+ )
102
+ except Exception as e:
103
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
104
+ twitter = getattr(obj, "twitter", None) or getattr(obj, "twitterUsername", None)
105
+ github = getattr(obj, "github", None) or getattr(obj, "githubUsername", None)
106
+ linkedin = getattr(obj, "linkedin", None) or getattr(obj, "linkedinUsername", None)
107
+ bluesky = getattr(obj, "bluesky", None) or getattr(obj, "blueskyUsername", None)
108
+ if ctx._budget_remaining() > 0 and any(
109
+ (v in {None, ""} for v in [twitter, github, linkedin, bluesky])
110
+ ):
111
+ socials_ep = f"/api/users/{u}/socials"
112
+ socials_resp = ctx._host_raw_call(socials_ep)
113
+ if socials_resp.get("ok"):
114
+ socials_payload = (
115
+ socials_resp.get("data")
116
+ if isinstance(socials_resp.get("data"), dict)
117
+ else {}
118
+ )
119
+ handles = (
120
+ socials_payload.get("socialHandles")
121
+ if isinstance(socials_payload.get("socialHandles"), dict)
122
+ else {}
123
+ )
124
+ twitter = twitter or handles.get("twitter")
125
+ github = github or handles.get("github")
126
+ linkedin = linkedin or handles.get("linkedin")
127
+ bluesky = bluesky or handles.get("bluesky")
128
+ orgs_raw = getattr(obj, "orgs", None)
129
+ org_names: list[str] | None = None
130
+ if isinstance(orgs_raw, (list, tuple, set)):
131
+ names = []
132
+ for org in orgs_raw:
133
+ if isinstance(org, str) and org.strip():
134
+ names.append(org.strip())
135
+ continue
136
+ name = getattr(org, "name", None)
137
+ if isinstance(name, str) and name.strip():
138
+ names.append(name.strip())
139
+ org_names = names or None
140
+ twitter_handle = _clean_social_handle(twitter)
141
+ github_handle = _clean_social_handle(github)
142
+ linkedin_handle = _clean_social_handle(linkedin)
143
+ bluesky_handle = _clean_social_handle(bluesky)
144
+ item = {
145
+ "username": obj.username or u,
146
+ "fullname": obj.fullname,
147
+ "bio": getattr(obj, "details", None),
148
+ "avatar_url": obj.avatar_url,
149
+ "website_url": getattr(obj, "websiteUrl", None),
150
+ "twitter": _social_url("twitter", twitter_handle),
151
+ "github": _social_url("github", github_handle),
152
+ "linkedin": _social_url("linkedin", linkedin_handle),
153
+ "bluesky": _social_url("bluesky", bluesky_handle),
154
+ "twitter_handle": twitter_handle,
155
+ "github_handle": github_handle,
156
+ "linkedin_handle": linkedin_handle,
157
+ "bluesky_handle": bluesky_handle,
158
+ "followers": ctx._as_int(obj.num_followers),
159
+ "following": ctx._as_int(obj.num_following),
160
+ "likes": ctx._as_int(obj.num_likes),
161
+ "models": ctx._as_int(getattr(obj, "num_models", None)),
162
+ "datasets": ctx._as_int(getattr(obj, "num_datasets", None)),
163
+ "spaces": ctx._as_int(getattr(obj, "num_spaces", None)),
164
+ "discussions": ctx._as_int(getattr(obj, "num_discussions", None)),
165
+ "papers": ctx._as_int(getattr(obj, "num_papers", None)),
166
+ "upvotes": ctx._as_int(getattr(obj, "num_upvotes", None)),
167
+ "orgs": org_names,
168
+ "is_pro": obj.is_pro,
169
+ }
170
+ return ctx._helper_success(
171
+ start_calls=start_calls,
172
+ source=endpoint,
173
+ items=[item],
174
+ scanned=1,
175
+ matched=1,
176
+ returned=1,
177
+ truncated=False,
178
+ )
179
+
180
+
181
+ async def _hf_org_overview(
182
+ ctx: HelperRuntimeContext, organization: str
183
+ ) -> dict[str, Any]:
184
+ start_calls = ctx.call_count["n"]
185
+ org = str(organization or "").strip()
186
+ if not org:
187
+ return ctx._helper_error(
188
+ start_calls=start_calls,
189
+ source="/api/organizations/<o>/overview",
190
+ error="organization is required",
191
+ )
192
+ endpoint = f"/api/organizations/{org}/overview"
193
+ try:
194
+ obj = ctx._host_hf_call(
195
+ endpoint,
196
+ lambda: ctx._get_hf_api_client().get_organization_overview(org),
197
+ )
198
+ except Exception as e:
199
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
200
+ item = {
201
+ "organization": obj.name or org,
202
+ "display_name": obj.fullname,
203
+ "avatar_url": obj.avatar_url,
204
+ "description": obj.details,
205
+ "website_url": getattr(obj, "websiteUrl", None),
206
+ "followers": ctx._as_int(obj.num_followers),
207
+ "members": ctx._as_int(obj.num_users),
208
+ "models": ctx._as_int(getattr(obj, "num_models", None)),
209
+ "datasets": ctx._as_int(getattr(obj, "num_datasets", None)),
210
+ "spaces": ctx._as_int(getattr(obj, "num_spaces", None)),
211
+ }
212
+ return ctx._helper_success(
213
+ start_calls=start_calls,
214
+ source=endpoint,
215
+ items=[item],
216
+ scanned=1,
217
+ matched=1,
218
+ returned=1,
219
+ truncated=False,
220
+ )
221
+
222
+
223
+ async def hf_org_members(
224
+ ctx: HelperRuntimeContext,
225
+ organization: str,
226
+ limit: int | None = None,
227
+ scan_limit: int | None = None,
228
+ count_only: bool = False,
229
+ where: dict[str, Any] | None = None,
230
+ fields: list[str] | None = None,
231
+ ) -> dict[str, Any]:
232
+ start_calls = ctx.call_count["n"]
233
+ org = str(organization or "").strip()
234
+ if not org:
235
+ return ctx._helper_error(
236
+ start_calls=start_calls,
237
+ source="/api/organizations/<o>/members",
238
+ error="organization is required",
239
+ )
240
+ default_limit = ctx._policy_int("hf_org_members", "default_limit", 100)
241
+ scan_cap = ctx._policy_int("hf_org_members", "scan_max", GRAPH_SCAN_LIMIT_CAP)
242
+ limit_plan = ctx._resolve_exhaustive_limits(
243
+ limit=limit,
244
+ count_only=count_only,
245
+ default_limit=default_limit,
246
+ max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
247
+ scan_limit=scan_limit,
248
+ scan_cap=scan_cap,
249
+ )
250
+ applied_limit = int(limit_plan["applied_limit"])
251
+ scan_lim = int(limit_plan["applied_scan_limit"])
252
+ has_where = isinstance(where, dict) and bool(where)
253
+ overview_total: int | None = None
254
+ overview_source = f"/api/organizations/{org}/overview"
255
+ if ctx._budget_remaining() > 0:
256
+ try:
257
+ org_obj = ctx._host_hf_call(
258
+ overview_source,
259
+ lambda: ctx._get_hf_api_client().get_organization_overview(org),
260
+ )
261
+ overview_total = ctx._as_int(getattr(org_obj, "num_users", None))
262
+ except Exception:
263
+ overview_total = None
264
+ if count_only and (not has_where) and (overview_total is not None):
265
+ return ctx._overview_count_only_success(
266
+ start_calls=start_calls,
267
+ source=overview_source,
268
+ total=overview_total,
269
+ limit_plan=limit_plan,
270
+ base_meta={
271
+ "scanned": 1,
272
+ "count_source": "overview",
273
+ "organization": org,
274
+ },
275
+ )
276
+ endpoint = f"/api/organizations/{org}/members"
277
+ try:
278
+ rows = ctx._host_hf_call(
279
+ endpoint,
280
+ lambda: list(
281
+ islice(
282
+ ctx._get_hf_api_client().list_organization_members(org),
283
+ scan_lim,
284
+ )
285
+ ),
286
+ )
287
+ except Exception as e:
288
+ return ctx._helper_error(
289
+ start_calls=start_calls, source=endpoint, error=e, organization=org
290
+ )
291
+ normalized: list[dict[str, Any]] = []
292
+ for row in rows:
293
+ handle = getattr(row, "username", None)
294
+ if not isinstance(handle, str) or not handle:
295
+ continue
296
+ item = {
297
+ "username": handle,
298
+ "fullname": getattr(row, "fullname", None),
299
+ "is_pro": getattr(row, "is_pro", None),
300
+ "role": getattr(row, "role", None),
301
+ }
302
+ normalized.append(item)
303
+ try:
304
+ normalized = ctx._apply_where(
305
+ normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
306
+ )
307
+ except ValueError as exc:
308
+ return ctx._helper_error(
309
+ start_calls=start_calls,
310
+ source=endpoint,
311
+ error=exc,
312
+ organization=org,
313
+ )
314
+ observed_total = len(rows)
315
+ scan_exhaustive = observed_total < scan_lim
316
+ overview_list_mismatch = (
317
+ overview_total is not None
318
+ and scan_exhaustive
319
+ and (observed_total != overview_total)
320
+ )
321
+ if has_where:
322
+ exact_count = scan_exhaustive
323
+ total = len(normalized)
324
+ total_matched = len(normalized)
325
+ elif overview_total is not None:
326
+ exact_count = True
327
+ total = overview_total
328
+ total_matched = overview_total
329
+ else:
330
+ exact_count = scan_exhaustive
331
+ total = observed_total
332
+ total_matched = observed_total
333
+ total_available = overview_total if overview_total is not None else observed_total
334
+ items = normalized[:applied_limit]
335
+ scan_limit_hit = not exact_count and observed_total >= scan_lim
336
+ count_source = (
337
+ "overview" if overview_total is not None and (not has_where) else "scan"
338
+ )
339
+ sample_complete = (
340
+ exact_count
341
+ and len(normalized) <= applied_limit
342
+ and (not count_only or len(normalized) == 0)
343
+ )
344
+ more_available = ctx._derive_more_available(
345
+ sample_complete=sample_complete,
346
+ exact_count=exact_count,
347
+ returned=len(items),
348
+ total=total,
349
+ )
350
+ if not exact_count and scan_limit_hit:
351
+ more_available = "unknown" if has_where else True
352
+ try:
353
+ items = ctx._project_actor_items(items, fields)
354
+ except ValueError as exc:
355
+ return ctx._helper_error(
356
+ start_calls=start_calls,
357
+ source=endpoint,
358
+ error=exc,
359
+ organization=org,
360
+ )
361
+ meta = ctx._build_exhaustive_result_meta(
362
+ base_meta={
363
+ "scanned": observed_total,
364
+ "total": total,
365
+ "total_available": total_available,
366
+ "total_matched": total_matched,
367
+ "count_source": count_source,
368
+ "lower_bound": bool(has_where and (not exact_count)),
369
+ "overview_total": overview_total,
370
+ "listed_total": observed_total,
371
+ "overview_list_mismatch": overview_list_mismatch,
372
+ "organization": org,
373
+ },
374
+ limit_plan=limit_plan,
375
+ matched_count=len(normalized),
376
+ returned_count=len(items),
377
+ exact_count=exact_count,
378
+ count_only=count_only,
379
+ sample_complete=sample_complete,
380
+ more_available=more_available,
381
+ scan_limit_hit=scan_limit_hit,
382
+ )
383
+ return ctx._helper_success(
384
+ start_calls=start_calls, source=endpoint, items=items, meta=meta
385
+ )
386
+
387
+
388
+ async def _user_graph_helper(
389
+ ctx: HelperRuntimeContext,
390
+ kind: str,
391
+ username: str,
392
+ pro_only: bool | None,
393
+ limit: int | None,
394
+ scan_limit: int | None,
395
+ count_only: bool,
396
+ where: dict[str, Any] | None,
397
+ fields: list[str] | None,
398
+ *,
399
+ helper_name: str,
400
+ ) -> dict[str, Any]:
401
+ start_calls = ctx.call_count["n"]
402
+ default_limit = ctx._policy_int(helper_name, "default_limit", 100)
403
+ scan_cap = ctx._policy_int(helper_name, "scan_max", GRAPH_SCAN_LIMIT_CAP)
404
+ max_limit = ctx._policy_int(
405
+ helper_name, "max_limit", EXHAUSTIVE_HELPER_RETURN_HARD_CAP
406
+ )
407
+ u = str(username or "").strip()
408
+ if not u:
409
+ return ctx._helper_error(
410
+ start_calls=start_calls,
411
+ source=f"/api/users/<u>/{kind}",
412
+ error="username is required",
413
+ )
414
+ limit_plan = ctx._resolve_exhaustive_limits(
415
+ limit=limit,
416
+ count_only=count_only,
417
+ default_limit=default_limit,
418
+ max_limit=max_limit,
419
+ scan_limit=scan_limit,
420
+ scan_cap=scan_cap,
421
+ )
422
+ applied_limit = int(limit_plan["applied_limit"])
423
+ scan_lim = int(limit_plan["applied_scan_limit"])
424
+ has_where = isinstance(where, dict) and bool(where)
425
+ filtered = pro_only is not None or has_where
426
+ entity_type = "user"
427
+ overview_total: int | None = None
428
+ overview_source = f"/api/users/{u}/overview"
429
+ if ctx._budget_remaining() > 0:
430
+ try:
431
+ user_obj = ctx._host_hf_call(
432
+ overview_source,
433
+ lambda: ctx._get_hf_api_client().get_user_overview(u),
434
+ )
435
+ overview_total = ctx._as_int(
436
+ user_obj.num_followers
437
+ if kind == "followers"
438
+ else user_obj.num_following
439
+ )
440
+ except Exception:
441
+ org_overview_source = f"/api/organizations/{u}/overview"
442
+ try:
443
+ org_obj = ctx._host_hf_call(
444
+ org_overview_source,
445
+ lambda: ctx._get_hf_api_client().get_organization_overview(u),
446
+ )
447
+ except Exception:
448
+ overview_total = None
449
+ else:
450
+ entity_type = "organization"
451
+ overview_source = org_overview_source
452
+ if kind != "followers":
453
+ return ctx._helper_error(
454
+ start_calls=start_calls,
455
+ source=f"/api/organizations/{u}/{kind}",
456
+ error="organization graph only supports relation='followers'; organizations do not expose a following list",
457
+ relation=kind,
458
+ organization=u,
459
+ entity=u,
460
+ entity_type=entity_type,
461
+ )
462
+ overview_total = ctx._as_int(getattr(org_obj, "num_followers", None))
463
+ if count_only and (not filtered) and (overview_total is not None):
464
+ return ctx._overview_count_only_success(
465
+ start_calls=start_calls,
466
+ source=overview_source,
467
+ total=overview_total,
468
+ limit_plan=limit_plan,
469
+ base_meta={
470
+ "scanned": 1,
471
+ "count_source": "overview",
472
+ "relation": kind,
473
+ "pro_only": pro_only,
474
+ "where_applied": has_where,
475
+ "entity": u,
476
+ "entity_type": entity_type,
477
+ "username": u,
478
+ "organization": u if entity_type == "organization" else None,
479
+ },
480
+ )
481
+ endpoint = f"/api/users/{u}/{kind}"
482
+ try:
483
+ if entity_type == "organization":
484
+ endpoint = f"/api/organizations/{u}/followers"
485
+ rows = ctx._host_hf_call(
486
+ endpoint,
487
+ lambda: list(
488
+ islice(
489
+ ctx._get_hf_api_client().list_organization_followers(u),
490
+ scan_lim,
491
+ )
492
+ ),
493
+ )
494
+ elif kind == "followers":
495
+ rows = ctx._host_hf_call(
496
+ endpoint,
497
+ lambda: list(
498
+ islice(ctx._get_hf_api_client().list_user_followers(u), scan_lim)
499
+ ),
500
+ )
501
+ else:
502
+ rows = ctx._host_hf_call(
503
+ endpoint,
504
+ lambda: list(
505
+ islice(ctx._get_hf_api_client().list_user_following(u), scan_lim)
506
+ ),
507
+ )
508
+ except Exception as e:
509
+ return ctx._helper_error(
510
+ start_calls=start_calls,
511
+ source=endpoint,
512
+ error=e,
513
+ relation=kind,
514
+ username=u,
515
+ entity=u,
516
+ entity_type=entity_type,
517
+ organization=u if entity_type == "organization" else None,
518
+ )
519
+ normalized: list[dict[str, Any]] = []
520
+ for row in rows:
521
+ handle = getattr(row, "username", None)
522
+ if not isinstance(handle, str) or not handle:
523
+ continue
524
+ item = {
525
+ "username": handle,
526
+ "fullname": getattr(row, "fullname", None),
527
+ "is_pro": getattr(row, "is_pro", None),
528
+ }
529
+ if pro_only is True and item.get("is_pro") is not True:
530
+ continue
531
+ if pro_only is False and item.get("is_pro") is True:
532
+ continue
533
+ normalized.append(item)
534
+ try:
535
+ normalized = ctx._apply_where(
536
+ normalized, where, allowed_fields=ACTOR_CANONICAL_FIELDS
537
+ )
538
+ except ValueError as exc:
539
+ return ctx._helper_error(
540
+ start_calls=start_calls,
541
+ source=endpoint,
542
+ error=exc,
543
+ relation=kind,
544
+ username=u,
545
+ entity=u,
546
+ entity_type=entity_type,
547
+ organization=u if entity_type == "organization" else None,
548
+ )
549
+ observed_total = len(rows)
550
+ scan_exhaustive = observed_total < scan_lim
551
+ overview_list_mismatch = (
552
+ overview_total is not None
553
+ and scan_exhaustive
554
+ and (observed_total != overview_total)
555
+ )
556
+ if filtered:
557
+ exact_count = scan_exhaustive
558
+ total = len(normalized)
559
+ total_matched = len(normalized)
560
+ elif overview_total is not None:
561
+ exact_count = True
562
+ total = overview_total
563
+ total_matched = overview_total
564
+ else:
565
+ exact_count = scan_exhaustive
566
+ total = observed_total
567
+ total_matched = observed_total
568
+ total_available = overview_total if overview_total is not None else observed_total
569
+ items = normalized[:applied_limit]
570
+ scan_limit_hit = not exact_count and observed_total >= scan_lim
571
+ count_source = (
572
+ "overview" if overview_total is not None and (not filtered) else "scan"
573
+ )
574
+ sample_complete = (
575
+ exact_count
576
+ and len(normalized) <= applied_limit
577
+ and (not count_only or len(normalized) == 0)
578
+ )
579
+ more_available = ctx._derive_more_available(
580
+ sample_complete=sample_complete,
581
+ exact_count=exact_count,
582
+ returned=len(items),
583
+ total=total,
584
+ )
585
+ if not exact_count and scan_limit_hit:
586
+ more_available = "unknown" if filtered else True
587
+ try:
588
+ items = ctx._project_actor_items(items, fields)
589
+ except ValueError as exc:
590
+ return ctx._helper_error(
591
+ start_calls=start_calls,
592
+ source=endpoint,
593
+ error=exc,
594
+ relation=kind,
595
+ username=u,
596
+ entity=u,
597
+ entity_type=entity_type,
598
+ organization=u if entity_type == "organization" else None,
599
+ )
600
+ meta = ctx._build_exhaustive_result_meta(
601
+ base_meta={
602
+ "scanned": observed_total,
603
+ "total": total,
604
+ "total_available": total_available,
605
+ "total_matched": total_matched,
606
+ "count_source": count_source,
607
+ "lower_bound": bool(filtered and (not exact_count)),
608
+ "overview_total": overview_total,
609
+ "listed_total": observed_total,
610
+ "overview_list_mismatch": overview_list_mismatch,
611
+ "relation": kind,
612
+ "pro_only": pro_only,
613
+ "where_applied": has_where,
614
+ "entity": u,
615
+ "entity_type": entity_type,
616
+ "username": u,
617
+ "organization": u if entity_type == "organization" else None,
618
+ },
619
+ limit_plan=limit_plan,
620
+ matched_count=len(normalized),
621
+ returned_count=len(items),
622
+ exact_count=exact_count,
623
+ count_only=count_only,
624
+ sample_complete=sample_complete,
625
+ more_available=more_available,
626
+ scan_limit_hit=scan_limit_hit,
627
+ )
628
+ return ctx._helper_success(
629
+ start_calls=start_calls, source=endpoint, items=items, meta=meta
630
+ )
631
+
632
+
633
+ async def hf_profile_summary(
634
+ ctx: HelperRuntimeContext,
635
+ handle: str | None = None,
636
+ include: list[str] | None = None,
637
+ likes_limit: int = 10,
638
+ activity_limit: int = 10,
639
+ ) -> dict[str, Any]:
640
+ start_calls = ctx.call_count["n"]
641
+ resolved_handle, resolve_error = await resolve_username_or_current(ctx, handle)
642
+ if resolve_error:
643
+ return ctx._helper_error(
644
+ start_calls=start_calls,
645
+ source="/api/users/<u>/overview",
646
+ error=resolve_error,
647
+ )
648
+ if not isinstance(resolved_handle, str):
649
+ return ctx._helper_error(
650
+ start_calls=start_calls,
651
+ source="/api/users/<u>/overview",
652
+ error="handle was not provided and current authenticated user could not be resolved",
653
+ )
654
+ try:
655
+ requested_sections = (
656
+ {part.lower() for part in ctx._coerce_str_list(include) if part.strip()}
657
+ if include is not None
658
+ else set()
659
+ )
660
+ except ValueError as e:
661
+ return ctx._helper_error(
662
+ start_calls=start_calls,
663
+ source=f"/api/users/{resolved_handle}/overview",
664
+ error=e,
665
+ )
666
+ invalid_sections = sorted(requested_sections - {"likes", "activity"})
667
+ if invalid_sections:
668
+ return ctx._helper_error(
669
+ start_calls=start_calls,
670
+ source=f"/api/users/{resolved_handle}/overview",
671
+ error=f"Unsupported include values: {invalid_sections}",
672
+ )
673
+ likes_lim = ctx._clamp_int(
674
+ likes_limit, default=10, minimum=0, maximum=OUTPUT_ITEMS_TRUNCATION_LIMIT
675
+ )
676
+ activity_lim = ctx._clamp_int(
677
+ activity_limit, default=10, minimum=0, maximum=OUTPUT_ITEMS_TRUNCATION_LIMIT
678
+ )
679
+ section_errors: dict[str, str] = {}
680
+ user_overview = await _hf_user_overview(ctx, resolved_handle)
681
+ if user_overview.get("ok") is True:
682
+ overview_item = ctx._helper_item(user_overview) or {"username": resolved_handle}
683
+ item: dict[str, Any] = {
684
+ "handle": str(overview_item.get("username") or resolved_handle),
685
+ "entity_type": "user",
686
+ "display_name": overview_item.get("fullname")
687
+ or str(overview_item.get("username") or resolved_handle),
688
+ "bio": overview_item.get("bio"),
689
+ "avatar_url": overview_item.get("avatar_url"),
690
+ "website_url": overview_item.get("website_url"),
691
+ "twitter_url": overview_item.get("twitter"),
692
+ "github_url": overview_item.get("github"),
693
+ "linkedin_url": overview_item.get("linkedin"),
694
+ "bluesky_url": overview_item.get("bluesky"),
695
+ "followers_count": ctx._overview_count(overview_item, "followers"),
696
+ "following_count": ctx._overview_count(overview_item, "following"),
697
+ "likes_count": ctx._overview_count(overview_item, "likes"),
698
+ "models_count": ctx._overview_count(overview_item, "models"),
699
+ "datasets_count": ctx._overview_count(overview_item, "datasets"),
700
+ "spaces_count": ctx._overview_count(overview_item, "spaces"),
701
+ "discussions_count": ctx._overview_count(overview_item, "discussions"),
702
+ "papers_count": ctx._overview_count(overview_item, "papers"),
703
+ "upvotes_count": ctx._overview_count(overview_item, "upvotes"),
704
+ "organizations": overview_item.get("orgs"),
705
+ "is_pro": overview_item.get("is_pro"),
706
+ }
707
+ if "likes" in requested_sections:
708
+ likes = await ctx.call_helper(
709
+ "hf_user_likes",
710
+ username=resolved_handle,
711
+ limit=likes_lim,
712
+ scan_limit=USER_SUMMARY_LIKES_SCAN_LIMIT,
713
+ count_only=likes_lim == 0,
714
+ sort="liked_at",
715
+ fields=[
716
+ "liked_at",
717
+ "repo_id",
718
+ "repo_type",
719
+ "repo_author",
720
+ "repo_url",
721
+ ],
722
+ )
723
+ item["likes_sample"] = likes.get("items") if likes.get("ok") is True else []
724
+ if likes.get("ok") is not True:
725
+ section_errors["likes"] = str(
726
+ likes.get("error") or "likes fetch failed"
727
+ )
728
+ if "activity" in requested_sections:
729
+ activity = await ctx.call_helper(
730
+ "hf_recent_activity",
731
+ feed_type="user",
732
+ entity=resolved_handle,
733
+ limit=activity_lim,
734
+ max_pages=USER_SUMMARY_ACTIVITY_MAX_PAGES,
735
+ count_only=activity_lim == 0,
736
+ fields=["timestamp", "event_type", "repo_type", "repo_id"],
737
+ )
738
+ item["activity_sample"] = (
739
+ activity.get("items") if activity.get("ok") is True else []
740
+ )
741
+ if activity.get("ok") is not True:
742
+ section_errors["activity"] = str(
743
+ activity.get("error") or "activity fetch failed"
744
+ )
745
+ return ctx._helper_success(
746
+ start_calls=start_calls,
747
+ source=f"/api/users/{resolved_handle}/overview",
748
+ items=[item],
749
+ scanned=1,
750
+ matched=1,
751
+ returned=1,
752
+ truncated=False,
753
+ handle=resolved_handle,
754
+ entity_type="user",
755
+ include=sorted(requested_sections),
756
+ likes_limit=likes_lim,
757
+ activity_limit=activity_lim,
758
+ section_errors=section_errors or None,
759
+ )
760
+ org_overview = await _hf_org_overview(ctx, resolved_handle)
761
+ if org_overview.get("ok") is True:
762
+ overview_item = ctx._helper_item(org_overview) or {
763
+ "organization": resolved_handle
764
+ }
765
+ item = {
766
+ "handle": str(overview_item.get("organization") or resolved_handle),
767
+ "entity_type": "organization",
768
+ "display_name": overview_item.get("display_name")
769
+ or str(overview_item.get("organization") or resolved_handle),
770
+ "description": overview_item.get("description"),
771
+ "avatar_url": overview_item.get("avatar_url"),
772
+ "website_url": overview_item.get("website_url"),
773
+ "followers_count": ctx._overview_count(overview_item, "followers"),
774
+ "members_count": ctx._overview_count(overview_item, "members"),
775
+ "models_count": ctx._overview_count(overview_item, "models"),
776
+ "datasets_count": ctx._overview_count(overview_item, "datasets"),
777
+ "spaces_count": ctx._overview_count(overview_item, "spaces"),
778
+ }
779
+ return ctx._helper_success(
780
+ start_calls=start_calls,
781
+ source=f"/api/organizations/{resolved_handle}/overview",
782
+ items=[item],
783
+ scanned=1,
784
+ matched=1,
785
+ returned=1,
786
+ truncated=False,
787
+ handle=resolved_handle,
788
+ entity_type="organization",
789
+ include=[],
790
+ ignored_includes=sorted(requested_sections) or None,
791
+ )
792
+ error = (
793
+ user_overview.get("error")
794
+ or org_overview.get("error")
795
+ or "profile fetch failed"
796
+ )
797
+ return ctx._helper_error(
798
+ start_calls=start_calls,
799
+ source=f"/api/profiles/{resolved_handle}",
800
+ error=error,
801
+ handle=resolved_handle,
802
+ )
803
+
804
+
805
+ async def hf_user_graph(
806
+ ctx: HelperRuntimeContext,
807
+ username: str | None = None,
808
+ relation: str = "followers",
809
+ limit: int | None = None,
810
+ scan_limit: int | None = None,
811
+ count_only: bool = False,
812
+ pro_only: bool | None = None,
813
+ where: dict[str, Any] | None = None,
814
+ fields: list[str] | None = None,
815
+ ) -> dict[str, Any]:
816
+ start_calls = ctx.call_count["n"]
817
+ rel = str(relation or "").strip().lower() or "followers"
818
+ if rel not in {"followers", "following"}:
819
+ return ctx._helper_error(
820
+ start_calls=start_calls,
821
+ source="/api/users/<u>/followers",
822
+ error="relation must be 'followers' or 'following'",
823
+ )
824
+ resolved_username, resolve_error = await resolve_username_or_current(ctx, username)
825
+ if resolve_error:
826
+ return ctx._helper_error(
827
+ start_calls=start_calls,
828
+ source=f"/api/users/<u>/{rel}",
829
+ error=resolve_error,
830
+ relation=rel,
831
+ )
832
+ if not isinstance(resolved_username, str):
833
+ return ctx._helper_error(
834
+ start_calls=start_calls,
835
+ source=f"/api/users/<u>/{rel}",
836
+ error="username is required",
837
+ relation=rel,
838
+ )
839
+ return await _user_graph_helper(
840
+ ctx,
841
+ rel,
842
+ resolved_username,
843
+ pro_only,
844
+ limit,
845
+ scan_limit,
846
+ count_only,
847
+ where,
848
+ fields,
849
+ helper_name="hf_user_graph",
850
+ )
851
+
852
+
853
+ def register_profile_helpers(
854
+ ctx: HelperRuntimeContext,
855
+ ) -> dict[str, Callable[..., Any]]:
856
+ return {
857
+ "hf_whoami": partial(hf_whoami, ctx),
858
+ "hf_org_members": partial(hf_org_members, ctx),
859
+ "hf_profile_summary": partial(hf_profile_summary, ctx),
860
+ "hf_user_graph": partial(hf_user_graph, ctx),
861
+ }
.prod/monty_api/helpers/repos.py ADDED
@@ -0,0 +1,1359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: C901, PLR0912, PLR0913, PLR0915, PLR0917
4
+ from itertools import islice
5
+ from typing import TYPE_CHECKING, Any, Callable
6
+ from ..context_types import HelperRuntimeContext
7
+ from ..helper_contracts import repo_expand_alias_map
8
+ from ..constants import (
9
+ ACTOR_CANONICAL_FIELDS,
10
+ DAILY_PAPER_CANONICAL_FIELDS,
11
+ EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
12
+ LIKES_ENRICHMENT_MAX_REPOS,
13
+ LIKES_RANKING_WINDOW_DEFAULT,
14
+ LIKES_SCAN_LIMIT_CAP,
15
+ OUTPUT_ITEMS_TRUNCATION_LIMIT,
16
+ REPO_CANONICAL_FIELDS,
17
+ SELECTIVE_ENDPOINT_RETURN_HARD_CAP,
18
+ TRENDING_ENDPOINT_MAX_LIMIT,
19
+ USER_LIKES_CANONICAL_FIELDS,
20
+ )
21
+ from ..registry import (
22
+ REPO_SEARCH_DEFAULT_EXPAND,
23
+ REPO_SEARCH_EXTRA_ARGS,
24
+ TRENDING_DEFAULT_FIELDS,
25
+ )
26
+
27
+
28
+ from .common import resolve_username_or_current
29
+
30
+ from functools import partial
31
+
32
+ if TYPE_CHECKING:
33
+ from huggingface_hub import HfApi
34
+
35
+
36
+ def _sanitize_repo_expand_values(
37
+ repo_type: str, raw_expand: Any
38
+ ) -> tuple[list[str] | None, list[str], str | None]:
39
+ if raw_expand is None:
40
+ return (None, [], None)
41
+ if isinstance(raw_expand, str):
42
+ requested_values = [raw_expand]
43
+ elif isinstance(raw_expand, (list, tuple, set)):
44
+ requested_values = list(raw_expand)
45
+ else:
46
+ return (None, [], "expand must be a string or a list of strings")
47
+
48
+ cleaned: list[str] = []
49
+ for value in requested_values:
50
+ value_str = str(value).strip()
51
+ if value_str and value_str not in cleaned:
52
+ cleaned.append(value_str)
53
+
54
+ alias_map = repo_expand_alias_map(repo_type)
55
+ dropped = [value for value in cleaned if value not in alias_map]
56
+ deduped_kept: list[str] = []
57
+ for value in cleaned:
58
+ resolved = alias_map.get(value)
59
+ if resolved is None or resolved in deduped_kept:
60
+ continue
61
+ deduped_kept.append(resolved)
62
+ return (deduped_kept or None, dropped, None)
63
+
64
+
65
+ def _resolve_repo_search_types(
66
+ ctx: HelperRuntimeContext,
67
+ *,
68
+ repo_type: str | None,
69
+ repo_types: list[str] | None,
70
+ default_repo_type: str = "model",
71
+ ) -> tuple[list[str] | None, str | None]:
72
+ if repo_type is not None and repo_types is not None:
73
+ return (None, "Pass either repo_type or repo_types, not both")
74
+
75
+ if repo_types is None:
76
+ raw_type = str(repo_type or "").strip()
77
+ if not raw_type:
78
+ return ([default_repo_type], None)
79
+ canonical = ctx._canonical_repo_type(raw_type, default="")
80
+ if canonical not in {"model", "dataset", "space"}:
81
+ return (None, f"Unsupported repo_type '{repo_type}'")
82
+ return ([canonical], None)
83
+
84
+ raw_types = ctx._coerce_str_list(repo_types)
85
+ if not raw_types:
86
+ return (None, "repo_types must not be empty")
87
+
88
+ requested_repo_types: list[str] = []
89
+ for raw in raw_types:
90
+ canonical = ctx._canonical_repo_type(raw, default="")
91
+ if canonical not in {"model", "dataset", "space"}:
92
+ return (None, f"Unsupported repo_type '{raw}'")
93
+ if canonical not in requested_repo_types:
94
+ requested_repo_types.append(canonical)
95
+ return (requested_repo_types, None)
96
+
97
+
98
+ def _clean_repo_search_text(value: str | None) -> str | None:
99
+ cleaned = str(value or "").strip()
100
+ return cleaned or None
101
+
102
+
103
+ def _normalize_repo_search_filter(
104
+ ctx: HelperRuntimeContext, value: str | list[str] | None
105
+ ) -> tuple[list[str] | None, str | None]:
106
+ if value is None:
107
+ return (None, None)
108
+ try:
109
+ normalized = ctx._coerce_str_list(value)
110
+ except ValueError:
111
+ return (None, "filter must be a string or a list of strings")
112
+ return (normalized or None, None)
113
+
114
+
115
+ def _build_repo_search_extra_args(
116
+ repo_type: str, **candidate_args: Any
117
+ ) -> tuple[dict[str, Any], list[str], str | None]:
118
+ normalized: dict[str, Any] = {}
119
+ for key, value in candidate_args.items():
120
+ if value is None:
121
+ continue
122
+ if key in {"card_data", "cardData"}:
123
+ if value:
124
+ normalized["cardData"] = True
125
+ continue
126
+ if key in {"fetch_config", "linked"}:
127
+ if value:
128
+ normalized[key] = True
129
+ continue
130
+ normalized[key] = value
131
+
132
+ allowed_extra = REPO_SEARCH_EXTRA_ARGS.get(repo_type, set())
133
+ unsupported = sorted(str(key) for key in normalized if str(key) not in allowed_extra)
134
+ if unsupported:
135
+ return (
136
+ {},
137
+ [],
138
+ f"Unsupported search args for repo_type='{repo_type}': {unsupported}. Allowed args: {sorted(allowed_extra)}",
139
+ )
140
+
141
+ dropped_expand: list[str] = []
142
+ if "expand" in normalized:
143
+ kept_expand, dropped_expand, expand_error = _sanitize_repo_expand_values(
144
+ repo_type, normalized.get("expand")
145
+ )
146
+ if expand_error:
147
+ return ({}, [], expand_error)
148
+ if kept_expand is None:
149
+ normalized.pop("expand", None)
150
+ else:
151
+ normalized["expand"] = kept_expand
152
+
153
+ if not any(
154
+ key in normalized for key in ("expand", "full", "cardData", "fetch_config")
155
+ ):
156
+ normalized["expand"] = list(REPO_SEARCH_DEFAULT_EXPAND[repo_type])
157
+
158
+ return (normalized, dropped_expand, None)
159
+
160
+
161
+ def _normalize_user_likes_sort(sort: str | None) -> tuple[str | None, str | None]:
162
+ normalized = str(sort or "liked_at").strip() or "liked_at"
163
+ if normalized not in {"liked_at", "repo_likes", "repo_downloads"}:
164
+ return (None, "sort must be one of liked_at, repo_likes, repo_downloads")
165
+ return (normalized, None)
166
+
167
+
168
+ async def _run_repo_search(
169
+ ctx: HelperRuntimeContext,
170
+ *,
171
+ helper_name: str,
172
+ requested_repo_types: list[str],
173
+ search: str | None,
174
+ filter: str | list[str] | None,
175
+ author: str | None,
176
+ sort: str | None,
177
+ limit: int,
178
+ fields: list[str] | None,
179
+ post_filter: dict[str, Any] | None,
180
+ extra_args_by_type: dict[str, dict[str, Any]] | None = None,
181
+ ) -> dict[str, Any]:
182
+ start_calls = ctx.call_count["n"]
183
+ default_limit = ctx._policy_int(helper_name, "default_limit", 20)
184
+ max_limit = ctx._policy_int(
185
+ helper_name, "max_limit", SELECTIVE_ENDPOINT_RETURN_HARD_CAP
186
+ )
187
+ filter_list, filter_error = _normalize_repo_search_filter(ctx, filter)
188
+ if filter_error:
189
+ return ctx._helper_error(
190
+ start_calls=start_calls,
191
+ source="/api/repos",
192
+ error=filter_error,
193
+ )
194
+
195
+ term = _clean_repo_search_text(search)
196
+ author_clean = _clean_repo_search_text(author)
197
+ requested_limit = limit
198
+ applied_limit = ctx._clamp_int(
199
+ limit,
200
+ default=default_limit,
201
+ minimum=1,
202
+ maximum=max_limit,
203
+ )
204
+ limit_meta = ctx._derive_limit_metadata(
205
+ requested_limit=requested_limit,
206
+ applied_limit=applied_limit,
207
+ default_limit_used=limit == default_limit,
208
+ )
209
+ hard_cap_applied = bool(limit_meta.get("hard_cap_applied"))
210
+
211
+ sort_keys: dict[str, str | None] = {}
212
+ for repo_type in requested_repo_types:
213
+ sort_key, sort_error = ctx._normalize_repo_sort_key(repo_type, sort)
214
+ if sort_error:
215
+ return ctx._helper_error(
216
+ start_calls=start_calls,
217
+ source=f"/api/{repo_type}s",
218
+ error=sort_error,
219
+ )
220
+ sort_keys[repo_type] = sort_key
221
+
222
+ all_items: list[dict[str, Any]] = []
223
+ scanned = 0
224
+ source_endpoints: list[str] = []
225
+ limit_boundary_hit = False
226
+ ignored_expand: dict[str, list[str]] = {}
227
+ api = ctx._get_hf_api_client()
228
+
229
+ for repo_type in requested_repo_types:
230
+ endpoint = f"/api/{repo_type}s"
231
+ source_endpoints.append(endpoint)
232
+ raw_extra_args = dict((extra_args_by_type or {}).get(repo_type, {}))
233
+ extra_args, dropped_expand, extra_error = _build_repo_search_extra_args(
234
+ repo_type,
235
+ **raw_extra_args,
236
+ )
237
+ if extra_error:
238
+ return ctx._helper_error(
239
+ start_calls=start_calls,
240
+ source=endpoint,
241
+ error=extra_error,
242
+ )
243
+ if dropped_expand:
244
+ ignored_expand[repo_type] = dropped_expand
245
+ try:
246
+ payload = ctx._host_hf_call(
247
+ endpoint,
248
+ lambda repo_type=repo_type, extra_args=extra_args: ctx._repo_list_call(
249
+ api,
250
+ repo_type,
251
+ search=term,
252
+ author=author_clean,
253
+ filter=filter_list,
254
+ sort=sort_keys[repo_type],
255
+ limit=applied_limit,
256
+ **extra_args,
257
+ ),
258
+ )
259
+ except Exception as e:
260
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
261
+ scanned += len(payload)
262
+ if len(payload) >= applied_limit:
263
+ limit_boundary_hit = True
264
+ all_items.extend(
265
+ ctx._normalize_repo_search_row(row, repo_type)
266
+ for row in payload[:applied_limit]
267
+ )
268
+
269
+ try:
270
+ all_items = ctx._apply_where(
271
+ all_items, post_filter, allowed_fields=REPO_CANONICAL_FIELDS
272
+ )
273
+ except ValueError as exc:
274
+ return ctx._helper_error(
275
+ start_calls=start_calls,
276
+ source="/api/repos",
277
+ error=exc,
278
+ )
279
+ combined_sort_key = next(iter(sort_keys.values()), None)
280
+ all_items = ctx._sort_repo_rows(all_items, combined_sort_key)
281
+ matched = len(all_items)
282
+ try:
283
+ all_items = ctx._project_repo_items(all_items[:applied_limit], fields)
284
+ except ValueError as exc:
285
+ return ctx._helper_error(
286
+ start_calls=start_calls,
287
+ source="/api/repos",
288
+ error=exc,
289
+ )
290
+
291
+ more_available: bool | str = False
292
+ truncated = False
293
+ truncated_by = "none"
294
+ next_request_hint: str | None = None
295
+ if hard_cap_applied and scanned >= applied_limit:
296
+ truncated = True
297
+ truncated_by = "hard_cap"
298
+ more_available = "unknown"
299
+ next_request_hint = f"Increase limit above {applied_limit} to improve coverage"
300
+ elif limit_boundary_hit:
301
+ more_available = "unknown"
302
+ next_request_hint = (
303
+ f"Increase limit above {applied_limit} to check whether more rows exist"
304
+ )
305
+
306
+ return ctx._helper_success(
307
+ start_calls=start_calls,
308
+ source=",".join(source_endpoints),
309
+ items=all_items,
310
+ helper=helper_name,
311
+ search=term,
312
+ repo_types=requested_repo_types,
313
+ filter=filter_list,
314
+ sort=combined_sort_key,
315
+ author=author_clean,
316
+ limit=applied_limit,
317
+ post_filter=post_filter if isinstance(post_filter, dict) and post_filter else None,
318
+ scanned=scanned,
319
+ matched=matched,
320
+ returned=len(all_items),
321
+ truncated=truncated,
322
+ truncated_by=truncated_by,
323
+ more_available=more_available,
324
+ limit_boundary_hit=limit_boundary_hit,
325
+ next_request_hint=next_request_hint,
326
+ ignored_expand=ignored_expand or None,
327
+ **limit_meta,
328
+ )
329
+
330
+
331
+ async def hf_models_search(
332
+ ctx: HelperRuntimeContext,
333
+ search: str | None = None,
334
+ filter: str | list[str] | None = None,
335
+ author: str | None = None,
336
+ apps: str | list[str] | None = None,
337
+ gated: bool | None = None,
338
+ inference: str | None = None,
339
+ inference_provider: str | list[str] | None = None,
340
+ model_name: str | None = None,
341
+ trained_dataset: str | list[str] | None = None,
342
+ pipeline_tag: str | None = None,
343
+ emissions_thresholds: tuple[float, float] | None = None,
344
+ sort: str | None = None,
345
+ limit: int = 20,
346
+ expand: list[str] | None = None,
347
+ full: bool | None = None,
348
+ card_data: bool = False,
349
+ fetch_config: bool = False,
350
+ fields: list[str] | None = None,
351
+ post_filter: dict[str, Any] | None = None,
352
+ ) -> dict[str, Any]:
353
+ return await _run_repo_search(
354
+ ctx,
355
+ helper_name="hf_models_search",
356
+ requested_repo_types=["model"],
357
+ search=search,
358
+ filter=filter,
359
+ author=author,
360
+ sort=sort,
361
+ limit=limit,
362
+ fields=fields,
363
+ post_filter=post_filter,
364
+ extra_args_by_type={
365
+ "model": {
366
+ "apps": apps,
367
+ "gated": gated,
368
+ "inference": inference,
369
+ "inference_provider": inference_provider,
370
+ "model_name": model_name,
371
+ "trained_dataset": trained_dataset,
372
+ "pipeline_tag": pipeline_tag,
373
+ "emissions_thresholds": emissions_thresholds,
374
+ "expand": expand,
375
+ "full": full,
376
+ "card_data": card_data,
377
+ "fetch_config": fetch_config,
378
+ }
379
+ },
380
+ )
381
+
382
+
383
+ async def hf_datasets_search(
384
+ ctx: HelperRuntimeContext,
385
+ search: str | None = None,
386
+ filter: str | list[str] | None = None,
387
+ author: str | None = None,
388
+ benchmark: str | bool | None = None,
389
+ dataset_name: str | None = None,
390
+ gated: bool | None = None,
391
+ language_creators: str | list[str] | None = None,
392
+ language: str | list[str] | None = None,
393
+ multilinguality: str | list[str] | None = None,
394
+ size_categories: str | list[str] | None = None,
395
+ task_categories: str | list[str] | None = None,
396
+ task_ids: str | list[str] | None = None,
397
+ sort: str | None = None,
398
+ limit: int = 20,
399
+ expand: list[str] | None = None,
400
+ full: bool | None = None,
401
+ fields: list[str] | None = None,
402
+ post_filter: dict[str, Any] | None = None,
403
+ ) -> dict[str, Any]:
404
+ return await _run_repo_search(
405
+ ctx,
406
+ helper_name="hf_datasets_search",
407
+ requested_repo_types=["dataset"],
408
+ search=search,
409
+ filter=filter,
410
+ author=author,
411
+ sort=sort,
412
+ limit=limit,
413
+ fields=fields,
414
+ post_filter=post_filter,
415
+ extra_args_by_type={
416
+ "dataset": {
417
+ "benchmark": benchmark,
418
+ "dataset_name": dataset_name,
419
+ "gated": gated,
420
+ "language_creators": language_creators,
421
+ "language": language,
422
+ "multilinguality": multilinguality,
423
+ "size_categories": size_categories,
424
+ "task_categories": task_categories,
425
+ "task_ids": task_ids,
426
+ "expand": expand,
427
+ "full": full,
428
+ }
429
+ },
430
+ )
431
+
432
+
433
+ async def hf_spaces_search(
434
+ ctx: HelperRuntimeContext,
435
+ search: str | None = None,
436
+ filter: str | list[str] | None = None,
437
+ author: str | None = None,
438
+ datasets: str | list[str] | None = None,
439
+ models: str | list[str] | None = None,
440
+ linked: bool = False,
441
+ sort: str | None = None,
442
+ limit: int = 20,
443
+ expand: list[str] | None = None,
444
+ full: bool | None = None,
445
+ fields: list[str] | None = None,
446
+ post_filter: dict[str, Any] | None = None,
447
+ ) -> dict[str, Any]:
448
+ return await _run_repo_search(
449
+ ctx,
450
+ helper_name="hf_spaces_search",
451
+ requested_repo_types=["space"],
452
+ search=search,
453
+ filter=filter,
454
+ author=author,
455
+ sort=sort,
456
+ limit=limit,
457
+ fields=fields,
458
+ post_filter=post_filter,
459
+ extra_args_by_type={
460
+ "space": {
461
+ "datasets": datasets,
462
+ "models": models,
463
+ "linked": linked,
464
+ "expand": expand,
465
+ "full": full,
466
+ }
467
+ },
468
+ )
469
+
470
+
471
+ async def hf_repo_search(
472
+ ctx: HelperRuntimeContext,
473
+ search: str | None = None,
474
+ repo_type: str | None = None,
475
+ repo_types: list[str] | None = None,
476
+ filter: str | list[str] | None = None,
477
+ author: str | None = None,
478
+ sort: str | None = None,
479
+ limit: int = 20,
480
+ fields: list[str] | None = None,
481
+ post_filter: dict[str, Any] | None = None,
482
+ ) -> dict[str, Any]:
483
+ start_calls = ctx.call_count["n"]
484
+ requested_repo_types, type_error = _resolve_repo_search_types(
485
+ ctx,
486
+ repo_type=repo_type,
487
+ repo_types=repo_types,
488
+ )
489
+ if type_error:
490
+ return ctx._helper_error(
491
+ start_calls=start_calls,
492
+ source="/api/repos",
493
+ error=type_error,
494
+ )
495
+ if not requested_repo_types:
496
+ return ctx._helper_error(
497
+ start_calls=start_calls,
498
+ source="/api/repos",
499
+ error="repo_type or repo_types is required",
500
+ )
501
+ return await _run_repo_search(
502
+ ctx,
503
+ helper_name="hf_repo_search",
504
+ requested_repo_types=requested_repo_types,
505
+ search=search,
506
+ filter=filter,
507
+ author=author,
508
+ sort=sort,
509
+ limit=limit,
510
+ fields=fields,
511
+ post_filter=post_filter,
512
+ )
513
+
514
+
515
+ async def hf_user_likes(
516
+ ctx: HelperRuntimeContext,
517
+ username: str | None = None,
518
+ repo_types: list[str] | None = None,
519
+ limit: int | None = None,
520
+ scan_limit: int | None = None,
521
+ count_only: bool = False,
522
+ where: dict[str, Any] | None = None,
523
+ fields: list[str] | None = None,
524
+ sort: str | None = None,
525
+ ranking_window: int | None = None,
526
+ ) -> dict[str, Any]:
527
+ start_calls = ctx.call_count["n"]
528
+ default_limit = ctx._policy_int("hf_user_likes", "default_limit", 100)
529
+ scan_cap = ctx._policy_int("hf_user_likes", "scan_max", LIKES_SCAN_LIMIT_CAP)
530
+ ranking_default = ctx._policy_int(
531
+ "hf_user_likes", "ranking_default", LIKES_RANKING_WINDOW_DEFAULT
532
+ )
533
+ enrich_cap = ctx._policy_int(
534
+ "hf_user_likes", "enrich_max", LIKES_ENRICHMENT_MAX_REPOS
535
+ )
536
+ resolved_username, resolve_error = await resolve_username_or_current(ctx, username)
537
+ if resolve_error:
538
+ return ctx._helper_error(
539
+ start_calls=start_calls,
540
+ source="/api/users/<u>/likes",
541
+ error=resolve_error,
542
+ )
543
+ if not isinstance(resolved_username, str):
544
+ return ctx._helper_error(
545
+ start_calls=start_calls,
546
+ source="/api/users/<u>/likes",
547
+ error="username is required",
548
+ )
549
+ sort_key, sort_error = _normalize_user_likes_sort(sort)
550
+ if sort_error:
551
+ return ctx._helper_error(
552
+ start_calls=start_calls,
553
+ source=f"/api/users/{resolved_username}/likes",
554
+ error=sort_error,
555
+ )
556
+ if sort_key is None:
557
+ return ctx._helper_error(
558
+ start_calls=start_calls,
559
+ source=f"/api/users/{resolved_username}/likes",
560
+ error="sort must be one of liked_at, repo_likes, repo_downloads",
561
+ )
562
+ limit_plan = ctx._resolve_exhaustive_limits(
563
+ limit=limit,
564
+ count_only=count_only,
565
+ default_limit=default_limit,
566
+ max_limit=EXHAUSTIVE_HELPER_RETURN_HARD_CAP,
567
+ scan_limit=scan_limit,
568
+ scan_cap=scan_cap,
569
+ )
570
+ applied_limit = int(limit_plan["applied_limit"])
571
+ scan_lim = int(limit_plan["applied_scan_limit"])
572
+ try:
573
+ normalized_where = ctx._normalize_where(
574
+ where, allowed_fields=USER_LIKES_CANONICAL_FIELDS
575
+ )
576
+ except ValueError as exc:
577
+ return ctx._helper_error(
578
+ start_calls=start_calls,
579
+ source=f"/api/users/{resolved_username}/likes",
580
+ error=exc,
581
+ )
582
+ allowed_repo_types: set[str] | None = None
583
+ try:
584
+ raw_repo_types: list[str] = (
585
+ ctx._coerce_str_list(repo_types) if repo_types is not None else []
586
+ )
587
+ except ValueError as e:
588
+ return ctx._helper_error(
589
+ start_calls=start_calls,
590
+ source=f"/api/users/{resolved_username}/likes",
591
+ error=e,
592
+ )
593
+ if raw_repo_types:
594
+ allowed_repo_types = set()
595
+ for raw in raw_repo_types:
596
+ canonical = ctx._canonical_repo_type(raw, default="")
597
+ if canonical not in {"model", "dataset", "space"}:
598
+ return ctx._helper_error(
599
+ start_calls=start_calls,
600
+ source=f"/api/users/{resolved_username}/likes",
601
+ error=f"Unsupported repo_type '{raw}'",
602
+ )
603
+ allowed_repo_types.add(canonical)
604
+ endpoint = f"/api/users/{resolved_username}/likes"
605
+ resp = ctx._host_raw_call(endpoint, params={"limit": scan_lim})
606
+ if not resp.get("ok"):
607
+ return ctx._helper_error(
608
+ start_calls=start_calls,
609
+ source=endpoint,
610
+ error=resp.get("error") or "likes fetch failed",
611
+ )
612
+ payload = resp.get("data") if isinstance(resp.get("data"), list) else []
613
+ scanned_rows = payload[:scan_lim]
614
+ matched_rows: list[tuple[int, dict[str, Any]]] = []
615
+ for row in scanned_rows:
616
+ if not isinstance(row, dict):
617
+ continue
618
+ repo = row.get("repo") if isinstance(row.get("repo"), dict) else {}
619
+ repo_data = row.get("repoData") if isinstance(row.get("repoData"), dict) else {}
620
+ repo_id = repo_data.get("id") or repo_data.get("name") or repo.get("name")
621
+ if not isinstance(repo_id, str) or not repo_id:
622
+ continue
623
+ repo_type = ctx._canonical_repo_type(
624
+ repo_data.get("type") or repo.get("type"), default=""
625
+ )
626
+ if not repo_type:
627
+ repo_type = ctx._canonical_repo_type(repo.get("type"), default="model")
628
+ if allowed_repo_types is not None and repo_type not in allowed_repo_types:
629
+ continue
630
+ repo_author = repo_data.get("author")
631
+ if not isinstance(repo_author, str) and "/" in repo_id:
632
+ repo_author = repo_id.split("/", 1)[0]
633
+ item = {
634
+ "liked_at": row.get("likedAt") or row.get("createdAt"),
635
+ "repo_id": repo_id,
636
+ "repo_type": repo_type,
637
+ "repo_author": repo_author,
638
+ "repo_likes": ctx._as_int(repo_data.get("likes")),
639
+ "repo_downloads": ctx._as_int(repo_data.get("downloads")),
640
+ "repo_url": ctx._repo_web_url(repo_type, repo_id),
641
+ }
642
+ if not ctx._item_matches_where(item, normalized_where):
643
+ continue
644
+ matched_rows.append((len(matched_rows), item))
645
+ matched = len(matched_rows)
646
+ scan_exhaustive = len(payload) < scan_lim
647
+ exact_count = scan_exhaustive
648
+ total_matched = matched
649
+ total = total_matched
650
+ effective_ranking_window: int | None = None
651
+ ranking_window_hit = False
652
+ ranking_window_applied = False
653
+ ranking_next_request_hint: str | None = None
654
+ ranking_complete = sort_key == "liked_at" and exact_count
655
+ enriched = 0
656
+ selected_pairs: list[tuple[int, dict[str, Any]]]
657
+ if count_only:
658
+ selected_pairs = []
659
+ ranking_complete = False if matched > 0 else exact_count
660
+ elif sort_key == "liked_at":
661
+ selected_pairs = matched_rows[:applied_limit]
662
+ else:
663
+ metric = str(sort_key)
664
+ requested_window = (
665
+ ranking_window if ranking_window is not None else ranking_default
666
+ )
667
+ effective_ranking_window = ctx._clamp_int(
668
+ requested_window, default=ranking_default, minimum=1, maximum=enrich_cap
669
+ )
670
+ ranking_window_applied = (
671
+ ranking_window is not None
672
+ and effective_ranking_window != int(ranking_window)
673
+ )
674
+ shortlist_size = min(effective_ranking_window, matched, scan_lim)
675
+ ranking_window_hit = matched > shortlist_size
676
+ shortlist = matched_rows[:shortlist_size]
677
+ candidates = [
678
+ pair
679
+ for pair in shortlist
680
+ if pair[1].get(metric) is None
681
+ and isinstance(pair[1].get("repo_id"), str)
682
+ and (pair[1].get("repo_type") in {"model", "dataset", "space"})
683
+ ]
684
+ enrich_budget = min(len(candidates), ctx._budget_remaining(), shortlist_size)
685
+ for _, item in candidates[:enrich_budget]:
686
+ repo_type = str(item.get("repo_type"))
687
+ repo_id = str(item.get("repo_id"))
688
+ detail_endpoint = f"/api/{ctx._canonical_repo_type(repo_type)}s/{repo_id}"
689
+ try:
690
+ detail = ctx._host_hf_call(
691
+ detail_endpoint,
692
+ lambda rt=repo_type, rid=repo_id: ctx._repo_detail_call(
693
+ ctx._get_hf_api_client(), rt, rid
694
+ ),
695
+ )
696
+ except Exception:
697
+ continue
698
+ likes = ctx._as_int(getattr(detail, "likes", None))
699
+ downloads = ctx._as_int(getattr(detail, "downloads", None))
700
+ if likes is not None:
701
+ item["repo_likes"] = likes
702
+ if downloads is not None:
703
+ item["repo_downloads"] = downloads
704
+ enriched += 1
705
+
706
+ def _ranking_key(pair: tuple[int, dict[str, Any]]) -> tuple[int, int, int]:
707
+ idx, row = pair
708
+ metric_value = ctx._as_int(row.get(metric))
709
+ if metric_value is None:
710
+ return (1, 0, idx)
711
+ return (0, -metric_value, idx)
712
+
713
+ ranked_shortlist = sorted(shortlist, key=_ranking_key)
714
+ selected_pairs = ranked_shortlist[:applied_limit]
715
+ ranking_complete = (
716
+ exact_count
717
+ and shortlist_size >= matched
718
+ and (len(candidates) <= enrich_budget)
719
+ )
720
+ if not ranking_complete:
721
+ if ranking_window_hit:
722
+ if effective_ranking_window < enrich_cap:
723
+ ranking_next_request_hint = (
724
+ f"Increase ranking_window above {effective_ranking_window} "
725
+ "for broader popularity reranking"
726
+ )
727
+ else:
728
+ ranking_next_request_hint = (
729
+ f"Popularity reranking is capped at {effective_ranking_window} "
730
+ "candidate repos per call"
731
+ )
732
+ elif len(candidates) > enrich_budget:
733
+ ranking_next_request_hint = (
734
+ f"Popularity reranking exhausted detail budget after {enrich_budget} "
735
+ "repo enrichments"
736
+ )
737
+ try:
738
+ items = ctx._project_user_like_items([row for _, row in selected_pairs], fields)
739
+ except ValueError as exc:
740
+ return ctx._helper_error(
741
+ start_calls=start_calls,
742
+ source=endpoint,
743
+ error=exc,
744
+ )
745
+ popularity_present = sum(
746
+ (1 for _, row in selected_pairs if row.get("repo_likes") is not None)
747
+ )
748
+ sample_complete = (
749
+ exact_count
750
+ and applied_limit >= matched
751
+ and (sort_key == "liked_at" or ranking_complete)
752
+ and (not count_only or matched == 0)
753
+ )
754
+ scan_limit_hit = not scan_exhaustive and len(payload) >= scan_lim
755
+ more_available = ctx._derive_more_available(
756
+ sample_complete=sample_complete,
757
+ exact_count=exact_count,
758
+ returned=len(items),
759
+ total=total,
760
+ )
761
+ if scan_limit_hit:
762
+ more_available = "unknown" if allowed_repo_types is not None or where else True
763
+ meta = ctx._build_exhaustive_result_meta(
764
+ base_meta={
765
+ "scanned": len(scanned_rows),
766
+ "total": total,
767
+ "total_available": len(payload),
768
+ "total_matched": total_matched,
769
+ "count_source": "scan",
770
+ "lower_bound": not exact_count,
771
+ "enriched": enriched,
772
+ "popularity_present": popularity_present,
773
+ "sort_applied": sort_key,
774
+ "ranking_window": effective_ranking_window,
775
+ "requested_ranking_window": ranking_window,
776
+ "ranking_window_applied": ranking_window_applied,
777
+ "ranking_window_hit": ranking_window_hit,
778
+ "ranking_next_request_hint": ranking_next_request_hint,
779
+ "ranking_complete": ranking_complete,
780
+ "username": resolved_username,
781
+ },
782
+ limit_plan=limit_plan,
783
+ matched_count=matched,
784
+ returned_count=len(items),
785
+ exact_count=exact_count,
786
+ count_only=count_only,
787
+ sample_complete=sample_complete,
788
+ more_available=more_available,
789
+ scan_limit_hit=scan_limit_hit,
790
+ truncated_extra=sort_key != "liked_at" and (not ranking_complete),
791
+ )
792
+ return ctx._helper_success(
793
+ start_calls=start_calls, source=endpoint, items=items, meta=meta
794
+ )
795
+
796
+
797
+ async def hf_repo_likers(
798
+ ctx: HelperRuntimeContext,
799
+ repo_id: str,
800
+ repo_type: str,
801
+ limit: int | None = None,
802
+ count_only: bool = False,
803
+ pro_only: bool | None = None,
804
+ where: dict[str, Any] | None = None,
805
+ fields: list[str] | None = None,
806
+ ) -> dict[str, Any]:
807
+ start_calls = ctx.call_count["n"]
808
+ rid = str(repo_id or "").strip()
809
+ if not rid:
810
+ return ctx._helper_error(
811
+ start_calls=start_calls,
812
+ source="/api/repos/<repo>/likers",
813
+ error="repo_id is required",
814
+ )
815
+ rt = ctx._canonical_repo_type(repo_type, default="")
816
+ if rt not in {"model", "dataset", "space"}:
817
+ return ctx._helper_error(
818
+ start_calls=start_calls,
819
+ source=f"/api/repos/{rid}/likers",
820
+ error=f"Unsupported repo_type '{repo_type}'",
821
+ repo_id=rid,
822
+ )
823
+ default_limit = ctx._policy_int("hf_repo_likers", "default_limit", 1000)
824
+ requested_limit = limit
825
+ default_limit_used = requested_limit is None and (not count_only)
826
+ has_where = isinstance(where, dict) and bool(where)
827
+ endpoint = f"/api/{rt}s/{rid}/likers"
828
+ resp = ctx._host_raw_call(endpoint)
829
+ if not resp.get("ok"):
830
+ return ctx._helper_error(
831
+ start_calls=start_calls,
832
+ source=endpoint,
833
+ error=resp.get("error") or "repo likers fetch failed",
834
+ repo_id=rid,
835
+ repo_type=rt,
836
+ )
837
+ payload = resp.get("data") if isinstance(resp.get("data"), list) else []
838
+ try:
839
+ normalized_where = ctx._normalize_where(
840
+ where, allowed_fields=ACTOR_CANONICAL_FIELDS
841
+ )
842
+ except ValueError as exc:
843
+ return ctx._helper_error(
844
+ start_calls=start_calls,
845
+ source=endpoint,
846
+ error=exc,
847
+ repo_id=rid,
848
+ repo_type=rt,
849
+ )
850
+ normalized: list[dict[str, Any]] = []
851
+ for row in payload:
852
+ if not isinstance(row, dict):
853
+ continue
854
+ username = row.get("user") or row.get("username")
855
+ if not isinstance(username, str) or not username:
856
+ continue
857
+ item = {
858
+ "username": username,
859
+ "fullname": row.get("fullname"),
860
+ "type": row.get("type")
861
+ if isinstance(row.get("type"), str) and row.get("type")
862
+ else "user",
863
+ "is_pro": row.get("isPro"),
864
+ }
865
+ if pro_only is True and item.get("is_pro") is not True:
866
+ continue
867
+ if pro_only is False and item.get("is_pro") is True:
868
+ continue
869
+ if not ctx._item_matches_where(item, normalized_where):
870
+ continue
871
+ normalized.append(item)
872
+ if count_only:
873
+ applied_limit = 0
874
+ elif requested_limit is None:
875
+ applied_limit = default_limit
876
+ else:
877
+ try:
878
+ applied_limit = max(0, int(requested_limit))
879
+ except Exception:
880
+ applied_limit = default_limit
881
+ limit_plan = {
882
+ "requested_limit": requested_limit,
883
+ "applied_limit": applied_limit,
884
+ "default_limit_used": default_limit_used,
885
+ "hard_cap_applied": False,
886
+ }
887
+ matched = len(normalized)
888
+ items = [] if count_only else normalized[:applied_limit]
889
+ limit_hit = applied_limit > 0 and matched > applied_limit
890
+ truncated_by = ctx._derive_truncated_by(
891
+ hard_cap=False, limit_hit=limit_hit
892
+ )
893
+ sample_complete = matched <= applied_limit and (not count_only or matched == 0)
894
+ truncated = truncated_by != "none"
895
+ more_available = ctx._derive_more_available(
896
+ sample_complete=sample_complete,
897
+ exact_count=True,
898
+ returned=len(items),
899
+ total=matched,
900
+ )
901
+ try:
902
+ items = ctx._project_actor_items(items, fields)
903
+ except ValueError as exc:
904
+ return ctx._helper_error(
905
+ start_calls=start_calls,
906
+ source=endpoint,
907
+ error=exc,
908
+ repo_id=rid,
909
+ repo_type=rt,
910
+ )
911
+ meta = ctx._build_exhaustive_meta(
912
+ base_meta={
913
+ "scanned": len(payload),
914
+ "matched": matched,
915
+ "returned": len(items),
916
+ "total": matched,
917
+ "total_available": len(payload),
918
+ "total_matched": matched,
919
+ "truncated": truncated,
920
+ "count_source": "likers_list",
921
+ "lower_bound": False,
922
+ "repo_id": rid,
923
+ "repo_type": rt,
924
+ "pro_only": pro_only,
925
+ "where_applied": has_where,
926
+ "upstream_pagination": "none",
927
+ },
928
+ limit_plan=limit_plan,
929
+ sample_complete=sample_complete,
930
+ exact_count=True,
931
+ truncated_by=truncated_by,
932
+ more_available=more_available,
933
+ )
934
+ meta["hard_cap_applied"] = False
935
+ return ctx._helper_success(
936
+ start_calls=start_calls, source=endpoint, items=items, meta=meta
937
+ )
938
+
939
+
940
+ async def hf_repo_discussions(
941
+ ctx: HelperRuntimeContext,
942
+ repo_type: str,
943
+ repo_id: str,
944
+ limit: int = 20,
945
+ fields: list[str] | None = None,
946
+ ) -> dict[str, Any]:
947
+ start_calls = ctx.call_count["n"]
948
+ rt = ctx._canonical_repo_type(repo_type)
949
+ rid = str(repo_id or "").strip()
950
+ if "/" not in rid:
951
+ return ctx._helper_error(
952
+ start_calls=start_calls,
953
+ source="/api/.../discussions",
954
+ error="repo_id must be owner/name",
955
+ )
956
+ lim = ctx._clamp_int(
957
+ limit, default=20, minimum=1, maximum=SELECTIVE_ENDPOINT_RETURN_HARD_CAP
958
+ )
959
+ endpoint = f"/api/{rt}s/{rid}/discussions"
960
+ try:
961
+ discussions = ctx._host_hf_call(
962
+ endpoint,
963
+ lambda: list(
964
+ islice(
965
+ ctx._get_hf_api_client().get_repo_discussions(
966
+ repo_id=rid, repo_type=rt
967
+ ),
968
+ lim,
969
+ )
970
+ ),
971
+ )
972
+ except Exception as e:
973
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
974
+ items: list[dict[str, Any]] = []
975
+ for d in discussions:
976
+ num = ctx._as_int(getattr(d, "num", None))
977
+ items.append(
978
+ {
979
+ "num": num,
980
+ "repo_id": rid,
981
+ "repo_type": rt,
982
+ "title": getattr(d, "title", None),
983
+ "author": getattr(d, "author", None),
984
+ "created_at": str(getattr(d, "created_at", None))
985
+ if getattr(d, "created_at", None) is not None
986
+ else None,
987
+ "status": getattr(d, "status", None),
988
+ "url": getattr(d, "url", None),
989
+ }
990
+ )
991
+ try:
992
+ items = ctx._project_discussion_items(items, fields)
993
+ except ValueError as exc:
994
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=exc)
995
+ return ctx._helper_success(
996
+ start_calls=start_calls,
997
+ source=endpoint,
998
+ items=items,
999
+ scanned=len(items),
1000
+ matched=len(items),
1001
+ returned=len(items),
1002
+ truncated=False,
1003
+ total_count=None,
1004
+ )
1005
+
1006
+
1007
+ async def hf_repo_discussion_details(
1008
+ ctx: HelperRuntimeContext,
1009
+ repo_type: str,
1010
+ repo_id: str,
1011
+ discussion_num: int,
1012
+ fields: list[str] | None = None,
1013
+ ) -> dict[str, Any]:
1014
+ start_calls = ctx.call_count["n"]
1015
+ rt = ctx._canonical_repo_type(repo_type)
1016
+ rid = str(repo_id or "").strip()
1017
+ if "/" not in rid:
1018
+ return ctx._helper_error(
1019
+ start_calls=start_calls,
1020
+ source="/api/.../discussions/<num>",
1021
+ error="repo_id must be owner/name",
1022
+ )
1023
+ num = ctx._as_int(discussion_num)
1024
+ if num is None:
1025
+ return ctx._helper_error(
1026
+ start_calls=start_calls,
1027
+ source=f"/api/{rt}s/{rid}/discussions/<num>",
1028
+ error="discussion_num must be an integer",
1029
+ )
1030
+ endpoint = f"/api/{rt}s/{rid}/discussions/{num}"
1031
+ try:
1032
+ detail = ctx._host_hf_call(
1033
+ endpoint,
1034
+ lambda: ctx._get_hf_api_client().get_discussion_details(
1035
+ repo_id=rid, discussion_num=int(num), repo_type=rt
1036
+ ),
1037
+ )
1038
+ except Exception as e:
1039
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=e)
1040
+ comment_events: list[dict[str, Any]] = []
1041
+ raw_events = getattr(detail, "events", None)
1042
+ if isinstance(raw_events, list):
1043
+ for event in raw_events:
1044
+ if str(getattr(event, "type", "")).strip().lower() != "comment":
1045
+ continue
1046
+ comment_events.append(
1047
+ {
1048
+ "author": getattr(event, "author", None),
1049
+ "created_at": ctx._dt_to_str(getattr(event, "created_at", None)),
1050
+ "text": getattr(event, "content", None),
1051
+ "rendered": getattr(event, "rendered", None),
1052
+ }
1053
+ )
1054
+ latest_comment: dict[str, Any] | None = None
1055
+ if comment_events:
1056
+ latest_comment = max(
1057
+ comment_events, key=lambda row: str(row.get("created_at") or "")
1058
+ )
1059
+ item: dict[str, Any] = {
1060
+ "num": num,
1061
+ "repo_id": rid,
1062
+ "repo_type": rt,
1063
+ "title": getattr(detail, "title", None),
1064
+ "author": getattr(detail, "author", None),
1065
+ "created_at": ctx._dt_to_str(getattr(detail, "created_at", None)),
1066
+ "status": getattr(detail, "status", None),
1067
+ "url": getattr(detail, "url", None),
1068
+ "comment_count": len(comment_events),
1069
+ "latest_comment_author": latest_comment.get("author")
1070
+ if latest_comment
1071
+ else None,
1072
+ "latest_comment_created_at": latest_comment.get("created_at")
1073
+ if latest_comment
1074
+ else None,
1075
+ "latest_comment_text": latest_comment.get("text") if latest_comment else None,
1076
+ "latest_comment_html": latest_comment.get("rendered")
1077
+ if latest_comment
1078
+ else None,
1079
+ }
1080
+ try:
1081
+ items = ctx._project_discussion_detail_items([item], fields)
1082
+ except ValueError as exc:
1083
+ return ctx._helper_error(start_calls=start_calls, source=endpoint, error=exc)
1084
+ return ctx._helper_success(
1085
+ start_calls=start_calls,
1086
+ source=endpoint,
1087
+ items=items,
1088
+ scanned=len(comment_events),
1089
+ matched=1,
1090
+ returned=len(items),
1091
+ truncated=False,
1092
+ total_comments=len(comment_events),
1093
+ )
1094
+
1095
+
1096
+ def _resolve_repo_detail_row(
1097
+ ctx: HelperRuntimeContext,
1098
+ api: "HfApi",
1099
+ repo_id: str,
1100
+ attempt_types: list[str],
1101
+ ) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
1102
+ rid = str(repo_id or "").strip()
1103
+ if "/" not in rid:
1104
+ return (None, {"repo_id": rid, "error": "repo_id must be owner/name"})
1105
+ resolved_type: str | None = None
1106
+ detail: Any = None
1107
+ last_endpoint = "/api/repos"
1108
+ errors: list[str] = []
1109
+ for rt in attempt_types:
1110
+ endpoint = f"/api/{rt}s/{rid}"
1111
+ last_endpoint = endpoint
1112
+ try:
1113
+ detail = ctx._host_hf_call(
1114
+ endpoint, lambda rt=rt, rid=rid: ctx._repo_detail_call(api, rt, rid)
1115
+ )
1116
+ resolved_type = rt
1117
+ break
1118
+ except Exception as e:
1119
+ errors.append(f"{rt}: {str(e)}")
1120
+ if resolved_type is None or detail is None:
1121
+ return (
1122
+ None,
1123
+ {
1124
+ "repo_id": rid,
1125
+ "error": "; ".join(errors[:3]) if errors else "repo lookup failed",
1126
+ "attempted_repo_types": list(attempt_types),
1127
+ "source": last_endpoint,
1128
+ },
1129
+ )
1130
+ return (ctx._normalize_repo_detail_row(detail, resolved_type, rid), None)
1131
+
1132
+
1133
+ async def hf_repo_details(
1134
+ ctx: HelperRuntimeContext,
1135
+ repo_id: str | None = None,
1136
+ repo_ids: list[str] | None = None,
1137
+ repo_type: str = "auto",
1138
+ fields: list[str] | None = None,
1139
+ ) -> dict[str, Any]:
1140
+ start_calls = ctx.call_count["n"]
1141
+ if repo_id is not None and repo_ids is not None:
1142
+ return ctx._helper_error(
1143
+ start_calls=start_calls,
1144
+ source="/api/repos",
1145
+ error="Pass either repo_id or repo_ids, not both",
1146
+ )
1147
+ requested_ids = (
1148
+ [str(repo_id).strip()]
1149
+ if isinstance(repo_id, str) and str(repo_id).strip()
1150
+ else []
1151
+ )
1152
+ if repo_ids is not None:
1153
+ requested_ids = ctx._coerce_str_list(repo_ids)
1154
+ if not requested_ids:
1155
+ return ctx._helper_error(
1156
+ start_calls=start_calls,
1157
+ source="/api/repos",
1158
+ error="repo_id or repo_ids is required",
1159
+ )
1160
+ raw_type = str(repo_type or "auto").strip().lower()
1161
+ if raw_type in {"", "auto"}:
1162
+ base_attempt_types = ["model", "dataset", "space"]
1163
+ else:
1164
+ canonical_type = ctx._canonical_repo_type(raw_type, default="")
1165
+ if canonical_type not in {"model", "dataset", "space"}:
1166
+ return ctx._helper_error(
1167
+ start_calls=start_calls,
1168
+ source="/api/repos",
1169
+ error=f"Unsupported repo_type '{repo_type}'",
1170
+ )
1171
+ base_attempt_types = [canonical_type]
1172
+ api = ctx._get_hf_api_client()
1173
+ items: list[dict[str, Any]] = []
1174
+ failures: list[dict[str, Any]] = []
1175
+ for rid in requested_ids:
1176
+ row, failure = _resolve_repo_detail_row(ctx, api, rid, base_attempt_types)
1177
+ if row is None:
1178
+ if failure is not None:
1179
+ failures.append(failure)
1180
+ continue
1181
+ items.append(row)
1182
+ if not items:
1183
+ summary = failures[0]["error"] if failures else "repo lookup failed"
1184
+ return ctx._helper_error(
1185
+ start_calls=start_calls,
1186
+ source="/api/repos",
1187
+ error=summary,
1188
+ failures=failures,
1189
+ repo_type=repo_type,
1190
+ )
1191
+ try:
1192
+ items = ctx._project_repo_items(items, fields)
1193
+ except ValueError as exc:
1194
+ return ctx._helper_error(start_calls=start_calls, source="/api/repos", error=exc)
1195
+ return ctx._helper_success(
1196
+ start_calls=start_calls,
1197
+ source="/api/repos",
1198
+ items=items,
1199
+ repo_type=repo_type,
1200
+ requested_repo_ids=requested_ids,
1201
+ failures=failures or None,
1202
+ matched=len(items),
1203
+ returned=len(items),
1204
+ )
1205
+
1206
+
1207
+ async def hf_trending(
1208
+ ctx: HelperRuntimeContext,
1209
+ repo_type: str = "model",
1210
+ limit: int = 20,
1211
+ where: dict[str, Any] | None = None,
1212
+ fields: list[str] | None = None,
1213
+ ) -> dict[str, Any]:
1214
+ start_calls = ctx.call_count["n"]
1215
+ default_limit = ctx._policy_int("hf_trending", "default_limit", 20)
1216
+ max_limit = ctx._policy_int(
1217
+ "hf_trending", "max_limit", TRENDING_ENDPOINT_MAX_LIMIT
1218
+ )
1219
+ raw_type = str(repo_type or "model").strip().lower()
1220
+ if raw_type == "all":
1221
+ requested_type = "all"
1222
+ else:
1223
+ requested_type = ctx._canonical_repo_type(raw_type, default="")
1224
+ if requested_type not in {"model", "dataset", "space"}:
1225
+ return ctx._helper_error(
1226
+ start_calls=start_calls,
1227
+ source="/api/trending",
1228
+ error=f"Unsupported repo_type '{repo_type}'",
1229
+ )
1230
+ lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
1231
+ resp = ctx._host_raw_call(
1232
+ "/api/trending", params={"type": requested_type, "limit": lim}
1233
+ )
1234
+ if not resp.get("ok"):
1235
+ return ctx._helper_error(
1236
+ start_calls=start_calls,
1237
+ source="/api/trending",
1238
+ error=resp.get("error") or "trending fetch failed",
1239
+ )
1240
+ payload = resp.get("data") if isinstance(resp.get("data"), dict) else {}
1241
+ rows = (
1242
+ payload.get("recentlyTrending")
1243
+ if isinstance(payload.get("recentlyTrending"), list)
1244
+ else []
1245
+ )
1246
+ items: list[dict[str, Any]] = []
1247
+ default_row_type = requested_type if requested_type != "all" else "model"
1248
+ for idx, row in enumerate(rows[:lim], start=1):
1249
+ if not isinstance(row, dict):
1250
+ continue
1251
+ repo = row.get("repoData") if isinstance(row.get("repoData"), dict) else {}
1252
+ items.append(ctx._normalize_trending_row(repo, default_row_type, rank=idx))
1253
+ try:
1254
+ items = ctx._apply_where(items, where, allowed_fields=TRENDING_DEFAULT_FIELDS)
1255
+ except ValueError as exc:
1256
+ return ctx._helper_error(
1257
+ start_calls=start_calls,
1258
+ source="/api/trending",
1259
+ error=exc,
1260
+ )
1261
+ matched = len(items)
1262
+ try:
1263
+ items = ctx._project_items(
1264
+ items[:lim],
1265
+ fields,
1266
+ allowed_fields=TRENDING_DEFAULT_FIELDS,
1267
+ )
1268
+ except ValueError as exc:
1269
+ return ctx._helper_error(
1270
+ start_calls=start_calls,
1271
+ source="/api/trending",
1272
+ error=exc,
1273
+ )
1274
+ return ctx._helper_success(
1275
+ start_calls=start_calls,
1276
+ source="/api/trending",
1277
+ items=items,
1278
+ repo_type=requested_type,
1279
+ limit=lim,
1280
+ scanned=len(rows),
1281
+ matched=matched,
1282
+ returned=len(items),
1283
+ trending_score_available=any(
1284
+ (item.get("trending_score") is not None for item in items)
1285
+ ),
1286
+ ordered_ranking=True,
1287
+ )
1288
+
1289
+
1290
+ async def hf_daily_papers(
1291
+ ctx: HelperRuntimeContext,
1292
+ limit: int = 20,
1293
+ where: dict[str, Any] | None = None,
1294
+ fields: list[str] | None = None,
1295
+ ) -> dict[str, Any]:
1296
+ start_calls = ctx.call_count["n"]
1297
+ default_limit = ctx._policy_int("hf_daily_papers", "default_limit", 20)
1298
+ max_limit = ctx._policy_int(
1299
+ "hf_daily_papers", "max_limit", OUTPUT_ITEMS_TRUNCATION_LIMIT
1300
+ )
1301
+ lim = ctx._clamp_int(limit, default=default_limit, minimum=1, maximum=max_limit)
1302
+ resp = ctx._host_raw_call("/api/daily_papers", params={"limit": lim})
1303
+ if not resp.get("ok"):
1304
+ return ctx._helper_error(
1305
+ start_calls=start_calls,
1306
+ source="/api/daily_papers",
1307
+ error=resp.get("error") or "daily papers fetch failed",
1308
+ )
1309
+ payload = resp.get("data") if isinstance(resp.get("data"), list) else []
1310
+ items: list[dict[str, Any]] = []
1311
+ for idx, row in enumerate(payload[:lim], start=1):
1312
+ if not isinstance(row, dict):
1313
+ continue
1314
+ items.append(ctx._normalize_daily_paper_row(row, rank=idx))
1315
+ try:
1316
+ items = ctx._apply_where(
1317
+ items, where, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
1318
+ )
1319
+ except ValueError as exc:
1320
+ return ctx._helper_error(
1321
+ start_calls=start_calls,
1322
+ source="/api/daily_papers",
1323
+ error=exc,
1324
+ )
1325
+ matched = len(items)
1326
+ try:
1327
+ items = ctx._project_daily_paper_items(items[:lim], fields)
1328
+ except ValueError as exc:
1329
+ return ctx._helper_error(
1330
+ start_calls=start_calls,
1331
+ source="/api/daily_papers",
1332
+ error=exc,
1333
+ )
1334
+ return ctx._helper_success(
1335
+ start_calls=start_calls,
1336
+ source="/api/daily_papers",
1337
+ items=items,
1338
+ limit=lim,
1339
+ scanned=len(payload),
1340
+ matched=matched,
1341
+ returned=len(items),
1342
+ ordered_ranking=True,
1343
+ )
1344
+
1345
+
1346
+ def register_repo_helpers(ctx: HelperRuntimeContext) -> dict[str, Callable[..., Any]]:
1347
+ return {
1348
+ "hf_models_search": partial(hf_models_search, ctx),
1349
+ "hf_datasets_search": partial(hf_datasets_search, ctx),
1350
+ "hf_spaces_search": partial(hf_spaces_search, ctx),
1351
+ "hf_repo_search": partial(hf_repo_search, ctx),
1352
+ "hf_user_likes": partial(hf_user_likes, ctx),
1353
+ "hf_repo_likers": partial(hf_repo_likers, ctx),
1354
+ "hf_repo_discussions": partial(hf_repo_discussions, ctx),
1355
+ "hf_repo_discussion_details": partial(hf_repo_discussion_details, ctx),
1356
+ "hf_repo_details": partial(hf_repo_details, ctx),
1357
+ "hf_trending": partial(hf_trending, ctx),
1358
+ "hf_daily_papers": partial(hf_daily_papers, ctx),
1359
+ }
.prod/monty_api/http_runtime.py ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from typing import TYPE_CHECKING, Any
6
+ from urllib.error import HTTPError, URLError
7
+ from urllib.parse import urlencode
8
+ from urllib.request import Request, urlopen
9
+
10
+ from .aliases import REPO_SORT_KEYS
11
+ from .constants import (
12
+ DEFAULT_TIMEOUT_SEC,
13
+ )
14
+ from .registry import REPO_API_ADAPTERS, REPO_SEARCH_DEFAULT_EXPAND
15
+ from .validation import _endpoint_allowed, _normalize_endpoint, _sanitize_params
16
+
17
+ if TYPE_CHECKING:
18
+ from huggingface_hub import HfApi
19
+
20
+
21
+ def _load_request_token() -> str | None:
22
+ try:
23
+ from fast_agent.mcp.auth.context import request_bearer_token # type: ignore
24
+
25
+ token = request_bearer_token.get()
26
+ if token:
27
+ return token
28
+ except Exception:
29
+ pass
30
+ return None
31
+
32
+
33
+ def _load_token() -> str | None:
34
+ token = _load_request_token()
35
+ if token:
36
+ return token
37
+ return os.getenv("HF_TOKEN") or None
38
+
39
+
40
+ def _json_best_effort(raw: bytes) -> Any:
41
+ try:
42
+ return json.loads(raw)
43
+ except Exception:
44
+ return raw.decode("utf-8", errors="replace")
45
+
46
+
47
+ def _clamp_int(value: Any, *, default: int, minimum: int, maximum: int) -> int:
48
+ try:
49
+ out = int(value)
50
+ except Exception:
51
+ out = default
52
+ return max(minimum, min(out, maximum))
53
+
54
+
55
+ def _as_int(value: Any) -> int | None:
56
+ try:
57
+ return int(value)
58
+ except Exception:
59
+ return None
60
+
61
+
62
+ def _canonical_repo_type(value: Any, *, default: str = "model") -> str:
63
+ raw = str(value or "").strip().lower()
64
+ aliases = {
65
+ "model": "model",
66
+ "models": "model",
67
+ "dataset": "dataset",
68
+ "datasets": "dataset",
69
+ "space": "space",
70
+ "spaces": "space",
71
+ }
72
+ return aliases.get(raw, default)
73
+
74
+
75
+ def _normalize_repo_sort_key(
76
+ repo_type: str, sort_value: Any
77
+ ) -> tuple[str | None, str | None]:
78
+ raw = str(sort_value or "").strip()
79
+ if not raw:
80
+ return None, None
81
+
82
+ key = raw
83
+ if key not in {
84
+ "created_at",
85
+ "downloads",
86
+ "last_modified",
87
+ "likes",
88
+ "trending_score",
89
+ }:
90
+ return None, f"Invalid sort key '{raw}'"
91
+
92
+ rt = _canonical_repo_type(repo_type)
93
+ allowed = REPO_SORT_KEYS.get(rt, set())
94
+ if key not in allowed:
95
+ return (
96
+ None,
97
+ f"Invalid sort key '{raw}' for repo_type='{rt}'. Allowed: {', '.join(sorted(allowed))}",
98
+ )
99
+ return key, None
100
+
101
+
102
+ def _repo_api_adapter(repo_type: str) -> Any:
103
+ rt = _canonical_repo_type(repo_type, default="")
104
+ adapter = REPO_API_ADAPTERS.get(rt)
105
+ if adapter is None:
106
+ raise ValueError(f"Unsupported repo_type '{repo_type}'")
107
+ return adapter
108
+
109
+
110
+ def _repo_list_call(api: HfApi, repo_type: str, **kwargs: Any) -> list[Any]:
111
+ adapter = _repo_api_adapter(repo_type)
112
+ method = getattr(api, adapter.list_method_name)
113
+ return list(method(**kwargs))
114
+
115
+
116
+ def _repo_detail_call(api: HfApi, repo_type: str, repo_id: str) -> Any:
117
+ adapter = _repo_api_adapter(repo_type)
118
+ method = getattr(api, adapter.detail_method_name)
119
+ if _canonical_repo_type(repo_type) == "space":
120
+ return method(repo_id, expand=list(REPO_SEARCH_DEFAULT_EXPAND["space"]))
121
+ return method(repo_id)
122
+
123
+
124
+ def _coerce_str_list(value: Any) -> list[str]:
125
+ if value is None:
126
+ return []
127
+ if isinstance(value, str):
128
+ raw = [value]
129
+ elif isinstance(value, (list, tuple, set)):
130
+ raw = list(value)
131
+ else:
132
+ raise ValueError("Expected a string or list of strings")
133
+ return [str(v).strip() for v in raw if str(v).strip()]
134
+
135
+
136
+ def _optional_str_list(value: Any) -> list[str] | None:
137
+ if value is None:
138
+ return None
139
+ if isinstance(value, str):
140
+ out = [value.strip()] if value.strip() else []
141
+ return out or None
142
+ if isinstance(value, (list, tuple, set)):
143
+ out = [str(v).strip() for v in value if str(v).strip()]
144
+ return out or None
145
+ return None
146
+
147
+
148
+ def _space_runtime_to_dict(value: Any) -> dict[str, Any] | None:
149
+ if value is None:
150
+ return None
151
+
152
+ if isinstance(value, dict):
153
+ raw = value
154
+ hardware = raw.get("hardware")
155
+ current_hardware = (
156
+ hardware.get("current") if isinstance(hardware, dict) else hardware
157
+ )
158
+ requested_hardware = (
159
+ hardware.get("requested")
160
+ if isinstance(hardware, dict)
161
+ else raw.get("requested_hardware") or raw.get("requestedHardware")
162
+ )
163
+ sleep_time = _as_int(
164
+ raw.get("gcTimeout")
165
+ if raw.get("gcTimeout") is not None
166
+ else raw.get("sleep_time") or raw.get("sleepTime")
167
+ )
168
+ out = {
169
+ "stage": raw.get("stage"),
170
+ "hardware": current_hardware,
171
+ "requested_hardware": requested_hardware,
172
+ "sleep_time": sleep_time,
173
+ }
174
+ return {key: val for key, val in out.items() if val is not None} or None
175
+
176
+ out = {
177
+ "stage": getattr(value, "stage", None),
178
+ "hardware": getattr(value, "hardware", None),
179
+ "requested_hardware": getattr(value, "requested_hardware", None),
180
+ "sleep_time": _as_int(getattr(value, "sleep_time", None)),
181
+ }
182
+ return {key: val for key, val in out.items() if val is not None} or None
183
+
184
+
185
+ def _extract_num_params(num_params: Any = None, safetensors: Any = None) -> int | None:
186
+ direct = _as_int(num_params)
187
+ if direct is not None:
188
+ return direct
189
+
190
+ total = getattr(safetensors, "total", None)
191
+ if total is None and isinstance(safetensors, dict):
192
+ total = safetensors.get("total")
193
+ return _as_int(total)
194
+
195
+
196
+ def _extract_num_params_from_object(row: Any) -> int | None:
197
+ raw_num_params = getattr(row, "num_params", None)
198
+ if raw_num_params is None:
199
+ raw_num_params = getattr(row, "numParameters", None)
200
+ if raw_num_params is None:
201
+ raw_num_params = getattr(row, "num_parameters", None)
202
+ return _extract_num_params(raw_num_params, getattr(row, "safetensors", None))
203
+
204
+
205
+ def _extract_num_params_from_dict(row: dict[str, Any]) -> int | None:
206
+ raw_num_params = row.get("num_params")
207
+ if raw_num_params is None:
208
+ raw_num_params = row.get("numParameters")
209
+ if raw_num_params is None:
210
+ raw_num_params = row.get("num_parameters")
211
+ return _extract_num_params(raw_num_params, row.get("safetensors"))
212
+
213
+
214
+ def _extract_author_names(value: Any) -> list[str] | None:
215
+ if not isinstance(value, (list, tuple)):
216
+ return None
217
+ names: list[str] = []
218
+ for item in value:
219
+ if isinstance(item, str) and item.strip():
220
+ names.append(item.strip())
221
+ continue
222
+ if isinstance(item, dict):
223
+ name = item.get("name")
224
+ if isinstance(name, str) and name.strip():
225
+ names.append(name.strip())
226
+ continue
227
+ name = getattr(item, "name", None)
228
+ if isinstance(name, str) and name.strip():
229
+ names.append(name.strip())
230
+ return names or None
231
+
232
+
233
+ def _extract_profile_name(value: Any) -> str | None:
234
+ if isinstance(value, str) and value.strip():
235
+ return value.strip()
236
+ if isinstance(value, dict):
237
+ for key in ("user", "name", "fullname", "handle"):
238
+ candidate = value.get(key)
239
+ if isinstance(candidate, str) and candidate.strip():
240
+ return candidate.strip()
241
+ return None
242
+ for attr in ("user", "name", "fullname", "handle"):
243
+ candidate = getattr(value, attr, None)
244
+ if isinstance(candidate, str) and candidate.strip():
245
+ return candidate.strip()
246
+ return None
247
+
248
+
249
+ def _author_from_any(value: Any) -> str | None:
250
+ if isinstance(value, str) and value:
251
+ return value
252
+ if isinstance(value, dict):
253
+ for key in ("name", "username", "user", "login"):
254
+ candidate = value.get(key)
255
+ if isinstance(candidate, str) and candidate:
256
+ return candidate
257
+ return None
258
+
259
+
260
+ def _dt_to_str(value: Any) -> str | None:
261
+ if value is None:
262
+ return None
263
+ iso = getattr(value, "isoformat", None)
264
+ if callable(iso):
265
+ try:
266
+ return str(iso())
267
+ except Exception:
268
+ pass
269
+ return str(value)
270
+
271
+
272
+ def _repo_web_url(repo_type: str, repo_id: str | None) -> str | None:
273
+ if not isinstance(repo_id, str) or not repo_id:
274
+ return None
275
+ base = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
276
+ rt = _canonical_repo_type(repo_type, default="")
277
+ if rt == "dataset":
278
+ return f"{base}/datasets/{repo_id}"
279
+ if rt == "space":
280
+ return f"{base}/spaces/{repo_id}"
281
+ return f"{base}/{repo_id}"
282
+
283
+
284
+ def _build_repo_row(
285
+ *,
286
+ repo_id: Any,
287
+ repo_type: str,
288
+ author: Any = None,
289
+ likes: Any = None,
290
+ downloads: Any = None,
291
+ created_at: Any = None,
292
+ last_modified: Any = None,
293
+ pipeline_tag: Any = None,
294
+ num_params: Any = None,
295
+ private: Any = None,
296
+ trending_score: Any = None,
297
+ tags: Any = None,
298
+ sha: Any = None,
299
+ gated: Any = None,
300
+ library_name: Any = None,
301
+ description: Any = None,
302
+ paperswithcode_id: Any = None,
303
+ sdk: Any = None,
304
+ models: Any = None,
305
+ datasets: Any = None,
306
+ subdomain: Any = None,
307
+ runtime: Any = None,
308
+ runtime_stage: Any = None,
309
+ ) -> dict[str, Any]:
310
+ rt = _canonical_repo_type(repo_type)
311
+ author_value = author
312
+ if (
313
+ not isinstance(author_value, str)
314
+ and isinstance(repo_id, str)
315
+ and "/" in repo_id
316
+ ):
317
+ author_value = repo_id.split("/", 1)[0]
318
+
319
+ runtime_payload = _space_runtime_to_dict(runtime)
320
+ resolved_runtime_stage = (
321
+ runtime_stage
322
+ if runtime_stage is not None
323
+ else runtime_payload.get("stage")
324
+ if isinstance(runtime_payload, dict)
325
+ else None
326
+ )
327
+
328
+ return {
329
+ "id": repo_id,
330
+ "slug": repo_id,
331
+ "repo_id": repo_id,
332
+ "repo_type": rt,
333
+ "author": author_value,
334
+ "likes": _as_int(likes),
335
+ "downloads": _as_int(downloads),
336
+ "created_at": _dt_to_str(created_at),
337
+ "last_modified": _dt_to_str(last_modified),
338
+ "pipeline_tag": pipeline_tag,
339
+ "num_params": _as_int(num_params),
340
+ "private": private,
341
+ "trending_score": _as_int(trending_score)
342
+ if trending_score is not None
343
+ else None,
344
+ "repo_url": _repo_web_url(rt, repo_id if isinstance(repo_id, str) else None),
345
+ "tags": _optional_str_list(tags),
346
+ "sha": sha,
347
+ "gated": gated,
348
+ "library_name": library_name,
349
+ "description": description,
350
+ "paperswithcode_id": paperswithcode_id,
351
+ "sdk": sdk,
352
+ "models": _optional_str_list(models),
353
+ "datasets": _optional_str_list(datasets),
354
+ "subdomain": subdomain,
355
+ "runtime_stage": resolved_runtime_stage,
356
+ "runtime": runtime_payload,
357
+ }
358
+
359
+
360
+ def _normalize_repo_search_row(row: Any, repo_type: str) -> dict[str, Any]:
361
+ return _build_repo_row(
362
+ repo_id=getattr(row, "id", None),
363
+ repo_type=repo_type,
364
+ author=getattr(row, "author", None),
365
+ likes=getattr(row, "likes", None),
366
+ downloads=getattr(row, "downloads", None),
367
+ created_at=getattr(row, "created_at", None),
368
+ last_modified=getattr(row, "last_modified", None),
369
+ pipeline_tag=getattr(row, "pipeline_tag", None),
370
+ num_params=_extract_num_params_from_object(row),
371
+ private=getattr(row, "private", None),
372
+ trending_score=getattr(row, "trending_score", None),
373
+ tags=getattr(row, "tags", None),
374
+ sha=getattr(row, "sha", None),
375
+ gated=getattr(row, "gated", None),
376
+ library_name=getattr(row, "library_name", None),
377
+ description=getattr(row, "description", None),
378
+ paperswithcode_id=getattr(row, "paperswithcode_id", None),
379
+ sdk=getattr(row, "sdk", None),
380
+ models=getattr(row, "models", None),
381
+ datasets=getattr(row, "datasets", None),
382
+ subdomain=getattr(row, "subdomain", None),
383
+ runtime=getattr(row, "runtime", None),
384
+ )
385
+
386
+
387
+ def _normalize_repo_detail_row(
388
+ detail: Any, repo_type: str, repo_id: str
389
+ ) -> dict[str, Any]:
390
+ row = _normalize_repo_search_row(detail, repo_type)
391
+ resolved_repo_id = row.get("repo_id") or repo_id
392
+ row["id"] = row.get("id") or resolved_repo_id
393
+ row["slug"] = row.get("slug") or resolved_repo_id
394
+ row["repo_id"] = resolved_repo_id
395
+ row["repo_url"] = _repo_web_url(repo_type, resolved_repo_id)
396
+ return row
397
+
398
+
399
+ def _normalize_trending_row(
400
+ repo: dict[str, Any], default_repo_type: str, rank: int | None = None
401
+ ) -> dict[str, Any]:
402
+ row = _build_repo_row(
403
+ repo_id=repo.get("id"),
404
+ repo_type=repo.get("type") or repo.get("repoType") or default_repo_type,
405
+ author=repo.get("author"),
406
+ likes=repo.get("likes"),
407
+ downloads=repo.get("downloads"),
408
+ created_at=repo.get("createdAt"),
409
+ last_modified=repo.get("lastModified"),
410
+ pipeline_tag=repo.get("pipeline_tag"),
411
+ num_params=_extract_num_params_from_dict(repo),
412
+ private=repo.get("private"),
413
+ trending_score=repo.get("trendingScore"),
414
+ tags=repo.get("tags"),
415
+ sha=repo.get("sha"),
416
+ gated=repo.get("gated"),
417
+ library_name=repo.get("library_name"),
418
+ description=repo.get("description"),
419
+ paperswithcode_id=repo.get("paperswithcode_id"),
420
+ sdk=repo.get("sdk"),
421
+ models=repo.get("models"),
422
+ datasets=repo.get("datasets"),
423
+ subdomain=repo.get("subdomain"),
424
+ runtime=repo.get("runtime"),
425
+ runtime_stage=repo.get("runtime_stage") or repo.get("runtimeStage"),
426
+ )
427
+ if rank is not None:
428
+ row["trending_rank"] = rank
429
+ return row
430
+
431
+
432
+ def _normalize_daily_paper_row(
433
+ row: dict[str, Any], rank: int | None = None
434
+ ) -> dict[str, Any]:
435
+ paper = row.get("paper") if isinstance(row.get("paper"), dict) else {}
436
+ org = (
437
+ row.get("organization")
438
+ if isinstance(row.get("organization"), dict)
439
+ else paper.get("organization")
440
+ )
441
+ organization = None
442
+ if isinstance(org, dict):
443
+ organization = org.get("name") or org.get("fullname")
444
+
445
+ item = {
446
+ "paper_id": paper.get("id"),
447
+ "title": row.get("title") or paper.get("title"),
448
+ "summary": row.get("summary")
449
+ or paper.get("summary")
450
+ or paper.get("ai_summary"),
451
+ "published_at": row.get("publishedAt") or paper.get("publishedAt"),
452
+ "submitted_on_daily_at": paper.get("submittedOnDailyAt"),
453
+ "authors": _extract_author_names(paper.get("authors")),
454
+ "organization": organization,
455
+ "submitted_by": _extract_profile_name(
456
+ row.get("submittedBy") or paper.get("submittedOnDailyBy")
457
+ ),
458
+ "discussion_id": paper.get("discussionId"),
459
+ "upvotes": _as_int(paper.get("upvotes")),
460
+ "github_repo_url": paper.get("githubRepo"),
461
+ "github_stars": _as_int(paper.get("githubStars")),
462
+ "project_page_url": paper.get("projectPage"),
463
+ "num_comments": _as_int(row.get("numComments")),
464
+ "is_author_participating": row.get("isAuthorParticipating")
465
+ if isinstance(row.get("isAuthorParticipating"), bool)
466
+ else None,
467
+ "repo_id": row.get("repo_id") or paper.get("repo_id"),
468
+ "rank": rank,
469
+ }
470
+ return item
471
+
472
+
473
+ def _normalize_collection_repo_item(row: dict[str, Any]) -> dict[str, Any] | None:
474
+ repo_id = row.get("id") or row.get("repoId") or row.get("repo_id")
475
+ if not isinstance(repo_id, str) or not repo_id:
476
+ return None
477
+
478
+ repo_type = _canonical_repo_type(
479
+ row.get("repoType") or row.get("repo_type") or row.get("type"), default=""
480
+ )
481
+ if repo_type not in {"model", "dataset", "space"}:
482
+ return None
483
+
484
+ return _build_repo_row(
485
+ repo_id=repo_id,
486
+ repo_type=repo_type,
487
+ author=row.get("author") or _author_from_any(row.get("authorData")),
488
+ likes=row.get("likes"),
489
+ downloads=row.get("downloads"),
490
+ created_at=row.get("createdAt") or row.get("created_at"),
491
+ last_modified=row.get("lastModified") or row.get("last_modified"),
492
+ pipeline_tag=row.get("pipeline_tag") or row.get("pipelineTag"),
493
+ num_params=_extract_num_params_from_dict(row),
494
+ private=row.get("private"),
495
+ tags=row.get("tags"),
496
+ gated=row.get("gated"),
497
+ library_name=row.get("library_name") or row.get("libraryName"),
498
+ description=row.get("description"),
499
+ paperswithcode_id=row.get("paperswithcode_id") or row.get("paperswithcodeId"),
500
+ sdk=row.get("sdk"),
501
+ models=row.get("models"),
502
+ datasets=row.get("datasets"),
503
+ subdomain=row.get("subdomain"),
504
+ runtime=row.get("runtime"),
505
+ runtime_stage=row.get("runtime_stage") or row.get("runtimeStage"),
506
+ )
507
+
508
+
509
+ def _sort_repo_rows(
510
+ rows: list[dict[str, Any]], sort_key: str | None
511
+ ) -> list[dict[str, Any]]:
512
+ if not sort_key:
513
+ return rows
514
+
515
+ if sort_key in {"likes", "downloads", "trending_score"}:
516
+ return sorted(
517
+ rows, key=lambda row: _as_int(row.get(sort_key)) or -1, reverse=True
518
+ )
519
+
520
+ if sort_key in {"created_at", "last_modified"}:
521
+ return sorted(rows, key=lambda row: str(row.get(sort_key) or ""), reverse=True)
522
+
523
+ return rows
524
+
525
+
526
+ def call_api_host(
527
+ endpoint: str,
528
+ *,
529
+ method: str = "GET",
530
+ params: dict[str, Any] | None = None,
531
+ json_body: dict[str, Any] | None = None,
532
+ timeout_sec: int = DEFAULT_TIMEOUT_SEC,
533
+ strict_mode: bool = False,
534
+ ) -> dict[str, Any]:
535
+ method_u = method.upper().strip()
536
+ if method_u not in {"GET", "POST"}:
537
+ raise ValueError("Only GET and POST are supported")
538
+
539
+ ep = _normalize_endpoint(endpoint)
540
+ if not _endpoint_allowed(ep, strict_mode):
541
+ raise ValueError(f"Endpoint not allowed: {ep}")
542
+
543
+ params = _sanitize_params(ep, params)
544
+ if ep == "/api/recent-activity":
545
+ feed_type = str((params or {}).get("feedType", "")).strip().lower()
546
+ if feed_type not in {"user", "org"}:
547
+ raise ValueError("/api/recent-activity requires feedType=user|org")
548
+ if not str((params or {}).get("entity", "")).strip():
549
+ raise ValueError("/api/recent-activity requires entity")
550
+
551
+ base = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
552
+ q = urlencode(params or {}, doseq=True)
553
+ url = f"{base}{ep}" + (f"?{q}" if q else "")
554
+
555
+ headers = {"Accept": "application/json"}
556
+ token = _load_token()
557
+ if token:
558
+ headers["Authorization"] = f"Bearer {token}"
559
+
560
+ data = None
561
+ if method_u == "POST":
562
+ headers["Content-Type"] = "application/json"
563
+ data = json.dumps(json_body or {}).encode("utf-8")
564
+
565
+ req = Request(url, method=method_u, headers=headers, data=data)
566
+ try:
567
+ with urlopen(req, timeout=timeout_sec) as res:
568
+ payload = _json_best_effort(res.read())
569
+ return {
570
+ "ok": True,
571
+ "status": int(res.status),
572
+ "url": url,
573
+ "data": payload,
574
+ "error": None,
575
+ }
576
+ except HTTPError as e:
577
+ payload = _json_best_effort(e.read())
578
+ err = (
579
+ payload
580
+ if isinstance(payload, str)
581
+ else json.dumps(payload, ensure_ascii=False)[:1000]
582
+ )
583
+ return {
584
+ "ok": False,
585
+ "status": int(e.code),
586
+ "url": url,
587
+ "data": payload,
588
+ "error": err,
589
+ }
590
+ except URLError as e:
591
+ return {
592
+ "ok": False,
593
+ "status": 0,
594
+ "url": url,
595
+ "data": None,
596
+ "error": f"Network error: {e}",
597
+ }
.prod/monty_api/query_entrypoints.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import asyncio
5
+ import inspect
6
+ import json
7
+ import os
8
+ import sys
9
+ import time
10
+ from typing import Any, Callable
11
+
12
+ from .constants import (
13
+ DEFAULT_MAX_CALLS,
14
+ DEFAULT_MONTY_MAX_ALLOCATIONS,
15
+ DEFAULT_MONTY_MAX_MEMORY,
16
+ DEFAULT_MONTY_MAX_RECURSION_DEPTH,
17
+ DEFAULT_TIMEOUT_SEC,
18
+ INTERNAL_STRICT_MODE,
19
+ MAX_CALLS_LIMIT,
20
+ )
21
+ from .runtime_context import build_runtime_helper_environment
22
+ from .validation import (
23
+ _coerce_jsonish_python_literals,
24
+ _summarize_limit_hit,
25
+ _truncate_result_payload,
26
+ _validate_generated_code,
27
+ _wrap_raw_result,
28
+ )
29
+
30
+
31
+ class MontyExecutionError(RuntimeError):
32
+ def __init__(self, message: str, api_calls: int, trace: list[dict[str, Any]]):
33
+ super().__init__(message)
34
+ self.api_calls = api_calls
35
+ self.trace = trace
36
+
37
+
38
+ def _query_debug_enabled() -> bool:
39
+ value = os.environ.get("MONTY_DEBUG_QUERY", "")
40
+ return value.strip().lower() in {"1", "true", "yes", "on"}
41
+
42
+
43
+ def _log_generated_query(
44
+ *, query: str, code: str, max_calls: int | None, timeout_sec: int | None
45
+ ) -> None:
46
+ if not _query_debug_enabled():
47
+ return
48
+ print("[monty-debug] query:", file=sys.stderr)
49
+ print(query, file=sys.stderr)
50
+ print("[monty-debug] max_calls:", max_calls, file=sys.stderr)
51
+ print("[monty-debug] timeout_sec:", timeout_sec, file=sys.stderr)
52
+ print("[monty-debug] code:", file=sys.stderr)
53
+ print(code, file=sys.stderr)
54
+ sys.stderr.flush()
55
+
56
+
57
+ def _introspect_helper_signatures() -> dict[str, set[str]]:
58
+ env = build_runtime_helper_environment(
59
+ max_calls=DEFAULT_MAX_CALLS,
60
+ strict_mode=INTERNAL_STRICT_MODE,
61
+ timeout_sec=DEFAULT_TIMEOUT_SEC,
62
+ )
63
+ signatures = {
64
+ name: {
65
+ parameter.name for parameter in inspect.signature(fn).parameters.values()
66
+ }
67
+ for name, fn in env.helper_functions.items()
68
+ }
69
+ return signatures
70
+
71
+
72
+ async def _run_with_monty(
73
+ *,
74
+ code: str,
75
+ query: str,
76
+ max_calls: int,
77
+ strict_mode: bool,
78
+ timeout_sec: int,
79
+ ) -> dict[str, Any]:
80
+ try:
81
+ import pydantic_monty
82
+ except Exception as e:
83
+ raise RuntimeError(
84
+ "pydantic_monty is not installed. Install with `uv pip install pydantic-monty`."
85
+ ) from e
86
+
87
+ env = build_runtime_helper_environment(
88
+ max_calls=max_calls,
89
+ strict_mode=strict_mode,
90
+ timeout_sec=timeout_sec,
91
+ )
92
+
93
+ m = pydantic_monty.Monty(
94
+ code,
95
+ inputs=["query", "max_calls"],
96
+ script_name="monty_agent.py",
97
+ type_check=False,
98
+ )
99
+
100
+ def _collecting_wrapper(
101
+ helper_name: str, fn: Callable[..., Any]
102
+ ) -> Callable[..., Any]:
103
+ async def wrapped(*args: Any, **kwargs: Any) -> Any:
104
+ result = await fn(*args, **kwargs)
105
+ summary = _summarize_limit_hit(helper_name, result)
106
+ if summary is not None and len(env.limit_summaries) < 20:
107
+ env.limit_summaries.append(summary)
108
+ return result
109
+
110
+ return wrapped
111
+
112
+ limits: pydantic_monty.ResourceLimits = {
113
+ "max_duration_secs": float(timeout_sec),
114
+ "max_memory": DEFAULT_MONTY_MAX_MEMORY,
115
+ "max_allocations": DEFAULT_MONTY_MAX_ALLOCATIONS,
116
+ "max_recursion_depth": DEFAULT_MONTY_MAX_RECURSION_DEPTH,
117
+ }
118
+
119
+ try:
120
+ result = await pydantic_monty.run_monty_async(
121
+ m,
122
+ inputs={"query": query, "max_calls": max_calls},
123
+ external_functions={
124
+ name: _collecting_wrapper(name, fn)
125
+ for name, fn in env.helper_functions.items()
126
+ },
127
+ limits=limits,
128
+ )
129
+ except Exception as e:
130
+ raise MontyExecutionError(str(e), env.call_count["n"], env.trace) from e
131
+
132
+ if env.call_count["n"] == 0:
133
+ if env.internal_helper_used["used"]:
134
+ return {
135
+ "output": _truncate_result_payload(result),
136
+ "api_calls": env.call_count["n"],
137
+ "trace": env.trace,
138
+ "limit_summaries": env.limit_summaries,
139
+ }
140
+ if isinstance(result, dict) and result.get("ok") is True:
141
+ meta = result.get("meta") if isinstance(result.get("meta"), dict) else {}
142
+ source = meta.get("source")
143
+ if isinstance(source, str) and source.startswith("internal://"):
144
+ return {
145
+ "output": _truncate_result_payload(result),
146
+ "api_calls": env.call_count["n"],
147
+ "trace": env.trace,
148
+ "limit_summaries": env.limit_summaries,
149
+ }
150
+ latest_helper_error = env.latest_helper_error_box.get("value")
151
+ if latest_helper_error is not None:
152
+ return {
153
+ "output": _truncate_result_payload(latest_helper_error),
154
+ "api_calls": env.call_count["n"],
155
+ "trace": env.trace,
156
+ "limit_summaries": env.limit_summaries,
157
+ }
158
+ if (
159
+ isinstance(result, dict)
160
+ and result.get("ok") is False
161
+ and isinstance(result.get("error"), str)
162
+ ):
163
+ return {
164
+ "output": _truncate_result_payload(result),
165
+ "api_calls": env.call_count["n"],
166
+ "trace": env.trace,
167
+ "limit_summaries": env.limit_summaries,
168
+ }
169
+ raise MontyExecutionError(
170
+ "Code completed without calling any external API function",
171
+ env.call_count["n"],
172
+ env.trace,
173
+ )
174
+
175
+ if not any(step.get("ok") is True for step in env.trace):
176
+ if (
177
+ isinstance(result, dict)
178
+ and result.get("ok") is False
179
+ and isinstance(result.get("error"), str)
180
+ ):
181
+ return {
182
+ "output": _truncate_result_payload(result),
183
+ "api_calls": env.call_count["n"],
184
+ "trace": env.trace,
185
+ "limit_summaries": env.limit_summaries,
186
+ }
187
+ raise MontyExecutionError(
188
+ "Code completed without a successful API call; refusing non-live fallback result",
189
+ env.call_count["n"],
190
+ env.trace,
191
+ )
192
+
193
+ return {
194
+ "output": _truncate_result_payload(result),
195
+ "api_calls": env.call_count["n"],
196
+ "trace": env.trace,
197
+ "limit_summaries": env.limit_summaries,
198
+ }
199
+
200
+
201
+ def _prepare_query_inputs(
202
+ *,
203
+ query: str,
204
+ code: str,
205
+ max_calls: int | None,
206
+ timeout_sec: int | None,
207
+ ) -> tuple[str, str, int, int]:
208
+ if not query or not query.strip():
209
+ raise ValueError("query is required")
210
+ if not code or not code.strip():
211
+ raise ValueError("code is required")
212
+
213
+ resolved_max_calls = DEFAULT_MAX_CALLS if max_calls is None else max_calls
214
+ resolved_timeout_sec = DEFAULT_TIMEOUT_SEC if timeout_sec is None else timeout_sec
215
+ normalized_max_calls = max(1, min(int(resolved_max_calls), MAX_CALLS_LIMIT))
216
+ normalized_timeout_sec = int(resolved_timeout_sec)
217
+ normalized_code = _coerce_jsonish_python_literals(code.strip())
218
+ _validate_generated_code(normalized_code)
219
+ return query, normalized_code, normalized_max_calls, normalized_timeout_sec
220
+
221
+
222
+ async def _execute_query(
223
+ *,
224
+ query: str,
225
+ code: str,
226
+ max_calls: int | None,
227
+ timeout_sec: int | None,
228
+ ) -> dict[str, Any]:
229
+ prepared_query, prepared_code, prepared_max_calls, prepared_timeout = (
230
+ _prepare_query_inputs(
231
+ query=query,
232
+ code=code,
233
+ max_calls=max_calls,
234
+ timeout_sec=timeout_sec,
235
+ )
236
+ )
237
+ _log_generated_query(
238
+ query=prepared_query,
239
+ code=prepared_code,
240
+ max_calls=prepared_max_calls,
241
+ timeout_sec=prepared_timeout,
242
+ )
243
+ return await _run_with_monty(
244
+ code=prepared_code,
245
+ query=prepared_query,
246
+ max_calls=prepared_max_calls,
247
+ strict_mode=INTERNAL_STRICT_MODE,
248
+ timeout_sec=prepared_timeout,
249
+ )
250
+
251
+
252
+ async def hf_hub_query(
253
+ query: str,
254
+ code: str,
255
+ max_calls: int | None = DEFAULT_MAX_CALLS,
256
+ timeout_sec: int | None = DEFAULT_TIMEOUT_SEC,
257
+ ) -> dict[str, Any]:
258
+ """Use natural-language queries to explore the Hugging Face Hub.
259
+
260
+ Best for read-only Hub discovery, lookup, ranking, and relationship questions
261
+ across users, organizations, repositories, activity, followers, likes,
262
+ discussions, and collections.
263
+ """
264
+ try:
265
+ run = await _execute_query(
266
+ query=query,
267
+ code=code,
268
+ max_calls=max_calls,
269
+ timeout_sec=timeout_sec,
270
+ )
271
+ return {
272
+ "ok": True,
273
+ "data": run["output"],
274
+ "error": None,
275
+ "api_calls": run["api_calls"],
276
+ }
277
+ except MontyExecutionError as e:
278
+ return {
279
+ "ok": False,
280
+ "data": None,
281
+ "error": str(e),
282
+ "api_calls": e.api_calls,
283
+ }
284
+ except Exception as e:
285
+ return {
286
+ "ok": False,
287
+ "data": None,
288
+ "error": str(e),
289
+ "api_calls": 0,
290
+ }
291
+
292
+
293
+ async def hf_hub_query_raw(
294
+ query: str,
295
+ code: str,
296
+ max_calls: int | None = DEFAULT_MAX_CALLS,
297
+ timeout_sec: int | None = DEFAULT_TIMEOUT_SEC,
298
+ ) -> Any:
299
+ """Use natural-language queries to explore the Hugging Face Hub in raw mode.
300
+
301
+ Best for read-only Hub discovery, lookup, ranking, and relationship
302
+ questions when the caller wants a runtime-owned raw envelope:
303
+ ``result`` contains the direct ``solve(...)`` output and ``meta`` contains
304
+ execution details such as timing, call counts, and limit summaries.
305
+ """
306
+ started = time.perf_counter()
307
+ try:
308
+ run = await _execute_query(
309
+ query=query,
310
+ code=code,
311
+ max_calls=max_calls,
312
+ timeout_sec=timeout_sec,
313
+ )
314
+ elapsed_ms = int((time.perf_counter() - started) * 1000)
315
+ return _wrap_raw_result(
316
+ run["output"],
317
+ ok=True,
318
+ api_calls=run["api_calls"],
319
+ elapsed_ms=elapsed_ms,
320
+ limit_summaries=run.get("limit_summaries"),
321
+ )
322
+ except MontyExecutionError as e:
323
+ elapsed_ms = int((time.perf_counter() - started) * 1000)
324
+ return _wrap_raw_result(
325
+ None,
326
+ ok=False,
327
+ api_calls=e.api_calls,
328
+ elapsed_ms=elapsed_ms,
329
+ error=str(e),
330
+ )
331
+ except Exception as e:
332
+ elapsed_ms = int((time.perf_counter() - started) * 1000)
333
+ return _wrap_raw_result(
334
+ None,
335
+ ok=False,
336
+ api_calls=0,
337
+ elapsed_ms=elapsed_ms,
338
+ error=str(e),
339
+ )
340
+
341
+
342
+ def _arg_parser() -> argparse.ArgumentParser:
343
+ p = argparse.ArgumentParser(description="Monty-backed API chaining tool (v3)")
344
+ p.add_argument("--query", required=True, help="Natural language query")
345
+ p.add_argument("--code", default=None, help="Inline Monty code to execute")
346
+ p.add_argument(
347
+ "--code-file", default=None, help="Path to .py file with Monty code to execute"
348
+ )
349
+ p.add_argument(
350
+ "--max-calls",
351
+ type=int,
352
+ default=DEFAULT_MAX_CALLS,
353
+ help="Max external API/helper calls",
354
+ )
355
+ p.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT_SEC)
356
+ return p
357
+
358
+
359
+ def main() -> int:
360
+ args = _arg_parser().parse_args()
361
+ code = args.code
362
+ if args.code_file:
363
+ with open(args.code_file, "r", encoding="utf-8") as f:
364
+ code = f.read()
365
+
366
+ if not code:
367
+ print(
368
+ json.dumps(
369
+ {"ok": False, "error": "Either --code or --code-file is required"},
370
+ ensure_ascii=False,
371
+ )
372
+ )
373
+ return 1
374
+
375
+ try:
376
+ out = asyncio.run(
377
+ hf_hub_query(
378
+ query=args.query,
379
+ code=code,
380
+ max_calls=args.max_calls,
381
+ timeout_sec=args.timeout,
382
+ )
383
+ )
384
+ print(json.dumps(out, ensure_ascii=False))
385
+ return 0 if out.get("ok") else 1
386
+ except Exception as e:
387
+ print(json.dumps({"ok": False, "error": str(e)}, ensure_ascii=False))
388
+ return 1
.prod/monty_api/registry.py ADDED
@@ -0,0 +1,681 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Mapping, NamedTuple
5
+
6
+ from .constants import (
7
+ ACTIVITY_CANONICAL_FIELDS,
8
+ ACTOR_CANONICAL_FIELDS,
9
+ COLLECTION_CANONICAL_FIELDS,
10
+ DAILY_PAPER_CANONICAL_FIELDS,
11
+ DISCUSSION_CANONICAL_FIELDS,
12
+ DISCUSSION_DETAIL_CANONICAL_FIELDS,
13
+ GRAPH_SCAN_LIMIT_CAP,
14
+ LIKES_ENRICHMENT_MAX_REPOS,
15
+ LIKES_RANKING_WINDOW_DEFAULT,
16
+ LIKES_SCAN_LIMIT_CAP,
17
+ OUTPUT_ITEMS_TRUNCATION_LIMIT,
18
+ PROFILE_CANONICAL_FIELDS,
19
+ RECENT_ACTIVITY_PAGE_SIZE,
20
+ RECENT_ACTIVITY_SCAN_MAX_PAGES,
21
+ REPO_CANONICAL_FIELDS,
22
+ TRENDING_ENDPOINT_MAX_LIMIT,
23
+ USER_LIKES_CANONICAL_FIELDS,
24
+ )
25
+
26
+
27
+ class RepoApiAdapter(NamedTuple):
28
+ list_method_name: str
29
+ detail_method_name: str
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class HelperConfig:
34
+ name: str
35
+ endpoint_patterns: tuple[str, ...] = ()
36
+ default_metadata: Mapping[str, Any] = field(default_factory=dict)
37
+ pagination: Mapping[str, Any] = field(default_factory=dict)
38
+
39
+
40
+ REPO_SEARCH_EXTRA_ARGS: dict[str, set[str]] = {
41
+ "dataset": {
42
+ "benchmark",
43
+ "dataset_name",
44
+ "expand",
45
+ "full",
46
+ "gated",
47
+ "language",
48
+ "language_creators",
49
+ "multilinguality",
50
+ "size_categories",
51
+ "task_categories",
52
+ "task_ids",
53
+ },
54
+ "model": {
55
+ "apps",
56
+ "cardData",
57
+ "emissions_thresholds",
58
+ "expand",
59
+ "fetch_config",
60
+ "full",
61
+ "gated",
62
+ "inference",
63
+ "inference_provider",
64
+ "model_name",
65
+ "pipeline_tag",
66
+ "trained_dataset",
67
+ },
68
+ "space": {"datasets", "expand", "full", "linked", "models"},
69
+ }
70
+
71
+ REPO_SEARCH_DEFAULT_EXPAND: dict[str, list[str]] = {
72
+ "dataset": [
73
+ "author",
74
+ "createdAt",
75
+ "description",
76
+ "downloads",
77
+ "gated",
78
+ "lastModified",
79
+ "likes",
80
+ "paperswithcode_id",
81
+ "private",
82
+ "sha",
83
+ "tags",
84
+ "trendingScore",
85
+ ],
86
+ "model": [
87
+ "author",
88
+ "createdAt",
89
+ "downloads",
90
+ "gated",
91
+ "lastModified",
92
+ "library_name",
93
+ "likes",
94
+ "pipeline_tag",
95
+ "private",
96
+ "safetensors",
97
+ "sha",
98
+ "tags",
99
+ "trendingScore",
100
+ ],
101
+ "space": [
102
+ "author",
103
+ "createdAt",
104
+ "datasets",
105
+ "lastModified",
106
+ "likes",
107
+ "models",
108
+ "private",
109
+ "runtime",
110
+ "sdk",
111
+ "sha",
112
+ "subdomain",
113
+ "tags",
114
+ "trendingScore",
115
+ ],
116
+ }
117
+
118
+ # NOTE:
119
+ # The huggingface_hub client type literals currently advertise a few expand values
120
+ # that the live Hub API rejects (`childrenModelCount`, `usedStorage`) and omits a
121
+ # few that the API now accepts (`xetEnabled`, `gitalyUid`). Keep this allowlist in
122
+ # sync with the live API error contract rather than the client typing surface so we
123
+ # can sanitize generated requests before they hit the network.
124
+ REPO_SEARCH_ALLOWED_EXPAND: dict[str, list[str]] = {
125
+ "dataset": [
126
+ "author",
127
+ "cardData",
128
+ "citation",
129
+ "createdAt",
130
+ "description",
131
+ "disabled",
132
+ "downloads",
133
+ "downloadsAllTime",
134
+ "gated",
135
+ "lastModified",
136
+ "likes",
137
+ "paperswithcode_id",
138
+ "private",
139
+ "resourceGroup",
140
+ "sha",
141
+ "siblings",
142
+ "tags",
143
+ "trendingScore",
144
+ "xetEnabled",
145
+ "gitalyUid",
146
+ ],
147
+ "model": [
148
+ "author",
149
+ "baseModels",
150
+ "cardData",
151
+ "config",
152
+ "createdAt",
153
+ "disabled",
154
+ "downloads",
155
+ "downloadsAllTime",
156
+ "evalResults",
157
+ "gated",
158
+ "gguf",
159
+ "inference",
160
+ "inferenceProviderMapping",
161
+ "lastModified",
162
+ "library_name",
163
+ "likes",
164
+ "mask_token",
165
+ "model-index",
166
+ "pipeline_tag",
167
+ "private",
168
+ "resourceGroup",
169
+ "safetensors",
170
+ "sha",
171
+ "siblings",
172
+ "spaces",
173
+ "tags",
174
+ "transformersInfo",
175
+ "trendingScore",
176
+ "widgetData",
177
+ "xetEnabled",
178
+ "gitalyUid",
179
+ ],
180
+ "space": [
181
+ "author",
182
+ "cardData",
183
+ "createdAt",
184
+ "datasets",
185
+ "disabled",
186
+ "lastModified",
187
+ "likes",
188
+ "models",
189
+ "private",
190
+ "resourceGroup",
191
+ "runtime",
192
+ "sdk",
193
+ "sha",
194
+ "siblings",
195
+ "subdomain",
196
+ "tags",
197
+ "trendingScore",
198
+ "xetEnabled",
199
+ "gitalyUid",
200
+ ],
201
+ }
202
+
203
+ RUNTIME_CAPABILITY_FIELDS = [
204
+ "allowed_sections",
205
+ "overview",
206
+ "helpers",
207
+ "helper_contracts",
208
+ "helper_defaults",
209
+ "fields",
210
+ "limits",
211
+ "repo_search",
212
+ ]
213
+ REPO_SUMMARY_FIELDS = list(REPO_CANONICAL_FIELDS)
214
+ REPO_SUMMARY_OPTIONAL_FIELDS = [
215
+ field
216
+ for field in REPO_CANONICAL_FIELDS
217
+ if field not in {"repo_id", "repo_type", "author", "repo_url"}
218
+ ]
219
+ ACTOR_OPTIONAL_FIELDS = [
220
+ field for field in ACTOR_CANONICAL_FIELDS if field != "username"
221
+ ]
222
+ PROFILE_OPTIONAL_FIELDS = [
223
+ field
224
+ for field in PROFILE_CANONICAL_FIELDS
225
+ if field not in {"handle", "entity_type"}
226
+ ]
227
+ TRENDING_DEFAULT_FIELDS = [*REPO_SUMMARY_FIELDS, "trending_rank"]
228
+ TRENDING_OPTIONAL_FIELDS = [
229
+ field
230
+ for field in TRENDING_DEFAULT_FIELDS
231
+ if field not in {"repo_id", "repo_type", "author", "repo_url", "trending_rank"}
232
+ ]
233
+ DAILY_PAPER_DEFAULT_FIELDS = list(DAILY_PAPER_CANONICAL_FIELDS)
234
+ DAILY_PAPER_OPTIONAL_FIELDS = [
235
+ field
236
+ for field in DAILY_PAPER_CANONICAL_FIELDS
237
+ if field not in {"paper_id", "title", "published_at", "rank"}
238
+ ]
239
+ COLLECTION_DEFAULT_FIELDS = list(COLLECTION_CANONICAL_FIELDS)
240
+ COLLECTION_OPTIONAL_FIELDS = [
241
+ field
242
+ for field in COLLECTION_CANONICAL_FIELDS
243
+ if field not in {"collection_id", "title", "owner"}
244
+ ]
245
+
246
+
247
+ def _metadata(
248
+ *,
249
+ default_fields: list[str],
250
+ guaranteed_fields: list[str],
251
+ notes: str,
252
+ optional_fields: list[str] | None = None,
253
+ default_upstream_calls: int = 1,
254
+ may_fan_out: bool = False,
255
+ default_limit: int | None = None,
256
+ max_limit: int | None = None,
257
+ ) -> dict[str, Any]:
258
+ metadata: dict[str, Any] = {
259
+ "default_fields": list(default_fields),
260
+ "guaranteed_fields": list(guaranteed_fields),
261
+ "optional_fields": list(
262
+ optional_fields
263
+ if optional_fields is not None
264
+ else [
265
+ field for field in default_fields if field not in set(guaranteed_fields)
266
+ ]
267
+ ),
268
+ "default_upstream_calls": default_upstream_calls,
269
+ "may_fan_out": may_fan_out,
270
+ "notes": notes,
271
+ }
272
+ if default_limit is not None:
273
+ metadata["default_limit"] = default_limit
274
+ if max_limit is not None:
275
+ metadata["max_limit"] = max_limit
276
+ return metadata
277
+
278
+
279
+ def _config(
280
+ name: str,
281
+ *,
282
+ endpoint_patterns: tuple[str, ...] = (),
283
+ default_metadata: Mapping[str, Any],
284
+ pagination: Mapping[str, Any] | None = None,
285
+ ) -> HelperConfig:
286
+ return HelperConfig(
287
+ name=name,
288
+ endpoint_patterns=endpoint_patterns,
289
+ default_metadata=dict(default_metadata),
290
+ pagination=dict(pagination or {}),
291
+ )
292
+
293
+
294
+ HELPER_CONFIGS: dict[str, HelperConfig] = {
295
+ "hf_runtime_capabilities": _config(
296
+ "hf_runtime_capabilities",
297
+ default_metadata=_metadata(
298
+ default_fields=RUNTIME_CAPABILITY_FIELDS,
299
+ guaranteed_fields=RUNTIME_CAPABILITY_FIELDS,
300
+ optional_fields=[],
301
+ default_upstream_calls=0,
302
+ notes="Introspection helper. Use section=... to narrow the response.",
303
+ ),
304
+ ),
305
+ "hf_whoami": _config(
306
+ "hf_whoami",
307
+ endpoint_patterns=(r"^/api/whoami-v2$",),
308
+ default_metadata=_metadata(
309
+ default_fields=["username", "fullname", "is_pro"],
310
+ guaranteed_fields=["username"],
311
+ notes="Returns the current authenticated user when a request token is available.",
312
+ ),
313
+ ),
314
+ "hf_profile_summary": _config(
315
+ "hf_profile_summary",
316
+ endpoint_patterns=(
317
+ r"^/api/users/[^/]+/overview$",
318
+ r"^/api/organizations/[^/]+/overview$",
319
+ ),
320
+ default_metadata=_metadata(
321
+ default_fields=list(PROFILE_CANONICAL_FIELDS),
322
+ guaranteed_fields=["handle", "entity_type"],
323
+ optional_fields=PROFILE_OPTIONAL_FIELDS,
324
+ may_fan_out=True,
325
+ notes=(
326
+ "Profile summary helper. Aggregate counts like followers_count/following_count "
327
+ "are in the base item. include=['likes', 'activity'] adds composed samples and "
328
+ "extra upstream work; no other include values are supported. Overview-owned "
329
+ "repo counts may differ slightly from visible public search/list results."
330
+ ),
331
+ ),
332
+ ),
333
+ "hf_org_members": _config(
334
+ "hf_org_members",
335
+ endpoint_patterns=(r"^/api/organizations/[^/]+/members$",),
336
+ default_metadata=_metadata(
337
+ default_fields=list(ACTOR_CANONICAL_FIELDS),
338
+ guaranteed_fields=["username"],
339
+ optional_fields=ACTOR_OPTIONAL_FIELDS,
340
+ default_limit=1_000,
341
+ max_limit=GRAPH_SCAN_LIMIT_CAP,
342
+ notes="Returns organization member summary rows.",
343
+ ),
344
+ pagination={"default_limit": 1_000, "scan_max": GRAPH_SCAN_LIMIT_CAP},
345
+ ),
346
+ "hf_models_search": _config(
347
+ "hf_models_search",
348
+ endpoint_patterns=(r"^/api/models$",),
349
+ default_metadata=_metadata(
350
+ default_fields=REPO_SUMMARY_FIELDS,
351
+ guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
352
+ optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
353
+ default_limit=20,
354
+ max_limit=5_000,
355
+ notes=(
356
+ "Thin model-search wrapper around the Hub list_models path. Prefer this "
357
+ "over hf_repo_search for model-only queries. This is a one-shot selective "
358
+ "search; if meta.limit_boundary_hit is true, more rows may exist and counts "
359
+ "are not exact."
360
+ ),
361
+ ),
362
+ pagination={"default_limit": 20, "max_limit": 5_000},
363
+ ),
364
+ "hf_datasets_search": _config(
365
+ "hf_datasets_search",
366
+ endpoint_patterns=(r"^/api/datasets$",),
367
+ default_metadata=_metadata(
368
+ default_fields=REPO_SUMMARY_FIELDS,
369
+ guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
370
+ optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
371
+ default_limit=20,
372
+ max_limit=5_000,
373
+ notes=(
374
+ "Thin dataset-search wrapper around the Hub list_datasets path. Prefer "
375
+ "this over hf_repo_search for dataset-only queries. This is a one-shot "
376
+ "selective search; if meta.limit_boundary_hit is true, more rows may exist "
377
+ "and counts are not exact."
378
+ ),
379
+ ),
380
+ pagination={"default_limit": 20, "max_limit": 5_000},
381
+ ),
382
+ "hf_spaces_search": _config(
383
+ "hf_spaces_search",
384
+ endpoint_patterns=(r"^/api/spaces$",),
385
+ default_metadata=_metadata(
386
+ default_fields=REPO_SUMMARY_FIELDS,
387
+ guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
388
+ optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
389
+ default_limit=20,
390
+ max_limit=5_000,
391
+ notes=(
392
+ "Thin space-search wrapper around the Hub list_spaces path. Prefer this "
393
+ "over hf_repo_search for space-only queries. This is a one-shot selective "
394
+ "search; if meta.limit_boundary_hit is true, more rows may exist and counts "
395
+ "are not exact."
396
+ ),
397
+ ),
398
+ pagination={"default_limit": 20, "max_limit": 5_000},
399
+ ),
400
+ "hf_repo_search": _config(
401
+ "hf_repo_search",
402
+ endpoint_patterns=(r"^/api/models$", r"^/api/datasets$", r"^/api/spaces$"),
403
+ default_metadata=_metadata(
404
+ default_fields=REPO_SUMMARY_FIELDS,
405
+ guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
406
+ optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
407
+ default_limit=20,
408
+ max_limit=5_000,
409
+ notes=(
410
+ "Small generic repo-search helper. Prefer hf_models_search, "
411
+ "hf_datasets_search, or hf_spaces_search for single-type queries; use "
412
+ "hf_repo_search for intentionally cross-type search. This is a one-shot "
413
+ "selective search; if meta.limit_boundary_hit is true, more rows may exist "
414
+ "and counts are not exact."
415
+ ),
416
+ ),
417
+ pagination={"default_limit": 20, "max_limit": 5_000},
418
+ ),
419
+ "hf_user_graph": _config(
420
+ "hf_user_graph",
421
+ endpoint_patterns=(
422
+ r"^/api/users/[^/]+/(followers|following)$",
423
+ r"^/api/organizations/[^/]+/followers$",
424
+ ),
425
+ default_metadata=_metadata(
426
+ default_fields=list(ACTOR_CANONICAL_FIELDS),
427
+ guaranteed_fields=["username"],
428
+ optional_fields=ACTOR_OPTIONAL_FIELDS,
429
+ default_limit=1_000,
430
+ max_limit=GRAPH_SCAN_LIMIT_CAP,
431
+ notes="Returns followers/following summary rows.",
432
+ ),
433
+ pagination={
434
+ "default_limit": 1_000,
435
+ "max_limit": GRAPH_SCAN_LIMIT_CAP,
436
+ "scan_max": GRAPH_SCAN_LIMIT_CAP,
437
+ },
438
+ ),
439
+ "hf_repo_likers": _config(
440
+ "hf_repo_likers",
441
+ endpoint_patterns=(
442
+ r"^/api/(models|datasets|spaces)/(?:[^/]+|[^/]+/[^/]+)/likers$",
443
+ ),
444
+ default_metadata=_metadata(
445
+ default_fields=list(ACTOR_CANONICAL_FIELDS),
446
+ guaranteed_fields=["username"],
447
+ optional_fields=ACTOR_OPTIONAL_FIELDS,
448
+ default_limit=1_000,
449
+ notes="Returns users who liked a repo.",
450
+ ),
451
+ pagination={"default_limit": 1_000},
452
+ ),
453
+ "hf_user_likes": _config(
454
+ "hf_user_likes",
455
+ endpoint_patterns=(r"^/api/users/[^/]+/likes$",),
456
+ default_metadata=_metadata(
457
+ default_fields=list(USER_LIKES_CANONICAL_FIELDS),
458
+ guaranteed_fields=["liked_at", "repo_id", "repo_type"],
459
+ optional_fields=["repo_author", "repo_likes", "repo_downloads", "repo_url"],
460
+ default_limit=100,
461
+ max_limit=2_000,
462
+ may_fan_out=True,
463
+ notes=(
464
+ "Default recency mode is cheap. Popularity-ranked sorts use canonical keys "
465
+ "liked_at/repo_likes/repo_downloads and rerank only a bounded recent "
466
+ "shortlist. Check meta.ranking_complete / meta.ranking_window when ranking "
467
+ "by popularity; helper-owned coverage matters here."
468
+ ),
469
+ ),
470
+ pagination={
471
+ "default_limit": 100,
472
+ "enrich_max": LIKES_ENRICHMENT_MAX_REPOS,
473
+ "ranking_default": LIKES_RANKING_WINDOW_DEFAULT,
474
+ "scan_max": LIKES_SCAN_LIMIT_CAP,
475
+ },
476
+ ),
477
+ "hf_recent_activity": _config(
478
+ "hf_recent_activity",
479
+ endpoint_patterns=(r"^/api/recent-activity$",),
480
+ default_metadata=_metadata(
481
+ default_fields=list(ACTIVITY_CANONICAL_FIELDS),
482
+ guaranteed_fields=["event_type", "timestamp"],
483
+ optional_fields=["repo_id", "repo_type"],
484
+ default_limit=100,
485
+ max_limit=2_000,
486
+ may_fan_out=True,
487
+ notes=(
488
+ "Activity helper may fetch multiple pages when requested coverage exceeds "
489
+ "one page. count_only may still be a lower bound unless the feed exhausts "
490
+ "before max_pages."
491
+ ),
492
+ ),
493
+ pagination={
494
+ "default_limit": 100,
495
+ "max_pages": RECENT_ACTIVITY_SCAN_MAX_PAGES,
496
+ "page_limit": RECENT_ACTIVITY_PAGE_SIZE,
497
+ },
498
+ ),
499
+ "hf_repo_discussions": _config(
500
+ "hf_repo_discussions",
501
+ endpoint_patterns=(r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",),
502
+ default_metadata=_metadata(
503
+ default_fields=list(DISCUSSION_CANONICAL_FIELDS),
504
+ guaranteed_fields=["num", "title", "author", "status"],
505
+ optional_fields=["repo_id", "repo_type", "created_at", "url"],
506
+ default_limit=20,
507
+ max_limit=200,
508
+ notes="Discussion summary helper.",
509
+ ),
510
+ ),
511
+ "hf_repo_discussion_details": _config(
512
+ "hf_repo_discussion_details",
513
+ endpoint_patterns=(
514
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
515
+ ),
516
+ default_metadata=_metadata(
517
+ default_fields=list(DISCUSSION_DETAIL_CANONICAL_FIELDS),
518
+ guaranteed_fields=["repo_id", "repo_type", "title", "author", "status"],
519
+ optional_fields=[
520
+ "num",
521
+ "created_at",
522
+ "url",
523
+ "comment_count",
524
+ "latest_comment_author",
525
+ "latest_comment_created_at",
526
+ "latest_comment_text",
527
+ "latest_comment_html",
528
+ ],
529
+ notes="Exact discussion detail helper.",
530
+ ),
531
+ ),
532
+ "hf_repo_details": _config(
533
+ "hf_repo_details",
534
+ endpoint_patterns=(r"^/api/(models|datasets|spaces)/[^/]+/[^/]+$",),
535
+ default_metadata=_metadata(
536
+ default_fields=REPO_SUMMARY_FIELDS,
537
+ guaranteed_fields=["repo_id", "repo_type", "author", "repo_url"],
538
+ optional_fields=REPO_SUMMARY_OPTIONAL_FIELDS,
539
+ may_fan_out=True,
540
+ notes="Exact repo metadata path. Multiple repo_ids may trigger one detail call per requested repo.",
541
+ ),
542
+ ),
543
+ "hf_trending": _config(
544
+ "hf_trending",
545
+ endpoint_patterns=(r"^/api/trending$",),
546
+ default_metadata=_metadata(
547
+ default_fields=TRENDING_DEFAULT_FIELDS,
548
+ guaranteed_fields=[
549
+ "repo_id",
550
+ "repo_type",
551
+ "author",
552
+ "repo_url",
553
+ "trending_rank",
554
+ ],
555
+ optional_fields=TRENDING_OPTIONAL_FIELDS,
556
+ default_limit=20,
557
+ max_limit=TRENDING_ENDPOINT_MAX_LIMIT,
558
+ notes="Returns ordered trending summary rows only. Use hf_repo_details for exact repo metadata.",
559
+ ),
560
+ pagination={"default_limit": 20, "max_limit": TRENDING_ENDPOINT_MAX_LIMIT},
561
+ ),
562
+ "hf_daily_papers": _config(
563
+ "hf_daily_papers",
564
+ endpoint_patterns=(r"^/api/daily_papers$",),
565
+ default_metadata=_metadata(
566
+ default_fields=DAILY_PAPER_DEFAULT_FIELDS,
567
+ guaranteed_fields=["paper_id", "title", "published_at", "rank"],
568
+ optional_fields=DAILY_PAPER_OPTIONAL_FIELDS,
569
+ default_limit=20,
570
+ max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
571
+ notes="Returns daily paper summary rows. repo_id is omitted unless the upstream payload provides it.",
572
+ ),
573
+ pagination={"default_limit": 20, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
574
+ ),
575
+ "hf_collections_search": _config(
576
+ "hf_collections_search",
577
+ endpoint_patterns=(r"^/api/collections$",),
578
+ default_metadata=_metadata(
579
+ default_fields=COLLECTION_DEFAULT_FIELDS,
580
+ guaranteed_fields=["collection_id", "title", "owner"],
581
+ optional_fields=COLLECTION_OPTIONAL_FIELDS,
582
+ default_limit=20,
583
+ max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
584
+ notes="Collection summary helper.",
585
+ ),
586
+ pagination={"default_limit": 20, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
587
+ ),
588
+ "hf_collection_items": _config(
589
+ "hf_collection_items",
590
+ endpoint_patterns=(
591
+ r"^/api/collections/[^/]+$",
592
+ r"^/api/collections/[^/]+/[^/]+$",
593
+ ),
594
+ default_metadata=_metadata(
595
+ default_fields=REPO_SUMMARY_FIELDS,
596
+ guaranteed_fields=["repo_id", "repo_type", "repo_url"],
597
+ optional_fields=[
598
+ field
599
+ for field in REPO_CANONICAL_FIELDS
600
+ if field not in {"repo_id", "repo_type", "repo_url"}
601
+ ],
602
+ default_limit=100,
603
+ max_limit=OUTPUT_ITEMS_TRUNCATION_LIMIT,
604
+ notes="Returns repos inside one collection as summary rows.",
605
+ ),
606
+ pagination={"default_limit": 100, "max_limit": OUTPUT_ITEMS_TRUNCATION_LIMIT},
607
+ ),
608
+ }
609
+
610
+ HELPER_EXTERNALS = tuple(HELPER_CONFIGS)
611
+
612
+ HELPER_DEFAULT_METADATA: dict[str, dict[str, Any]] = {
613
+ name: dict(config.default_metadata) for name, config in HELPER_CONFIGS.items()
614
+ }
615
+
616
+ PAGINATION_POLICY: dict[str, dict[str, Any]] = {
617
+ name: dict(config.pagination)
618
+ for name, config in HELPER_CONFIGS.items()
619
+ if config.pagination
620
+ }
621
+
622
+ HELPER_COVERED_ENDPOINT_PATTERNS: list[tuple[str, str]] = [
623
+ (pattern, config.name)
624
+ for config in HELPER_CONFIGS.values()
625
+ for pattern in config.endpoint_patterns
626
+ ]
627
+
628
+ ALLOWLIST_PATTERNS = [
629
+ r"^/api/whoami-v2$",
630
+ r"^/api/trending$",
631
+ r"^/api/daily_papers$",
632
+ r"^/api/models$",
633
+ r"^/api/datasets$",
634
+ r"^/api/spaces$",
635
+ r"^/api/models-tags-by-type$",
636
+ r"^/api/datasets-tags-by-type$",
637
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+$",
638
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",
639
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
640
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+/status$",
641
+ r"^/api/users/[^/]+/overview$",
642
+ r"^/api/users/[^/]+/socials$",
643
+ r"^/api/users/[^/]+/followers$",
644
+ r"^/api/users/[^/]+/following$",
645
+ r"^/api/users/[^/]+/likes$",
646
+ r"^/api/(models|datasets|spaces)/(?:[^/]+|[^/]+/[^/]+)/likers$",
647
+ r"^/api/organizations/[^/]+/overview$",
648
+ r"^/api/organizations/[^/]+/members$",
649
+ r"^/api/organizations/[^/]+/followers$",
650
+ r"^/api/collections$",
651
+ r"^/api/collections/[^/]+$",
652
+ r"^/api/collections/[^/]+/[^/]+$",
653
+ r"^/api/recent-activity$",
654
+ ]
655
+
656
+ STRICT_ALLOWLIST_PATTERNS = [
657
+ r"^/api/users/[^/]+/overview$",
658
+ r"^/api/users/[^/]+/socials$",
659
+ r"^/api/whoami-v2$",
660
+ r"^/api/trending$",
661
+ r"^/api/daily_papers$",
662
+ r"^/api/(models|datasets|spaces)/(?:[^/]+|[^/]+/[^/]+)/likers$",
663
+ r"^/api/collections$",
664
+ r"^/api/collections/[^/]+$",
665
+ r"^/api/collections/[^/]+/[^/]+$",
666
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions$",
667
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+$",
668
+ r"^/api/(models|datasets|spaces)/[^/]+/[^/]+/discussions/\d+/status$",
669
+ ]
670
+
671
+ REPO_API_ADAPTERS: dict[str, RepoApiAdapter] = {
672
+ "model": RepoApiAdapter(
673
+ list_method_name="list_models", detail_method_name="model_info"
674
+ ),
675
+ "dataset": RepoApiAdapter(
676
+ list_method_name="list_datasets", detail_method_name="dataset_info"
677
+ ),
678
+ "space": RepoApiAdapter(
679
+ list_method_name="list_spaces", detail_method_name="space_info"
680
+ ),
681
+ }
.prod/monty_api/runtime_context.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+ from typing import TYPE_CHECKING, Any, Callable, NamedTuple, cast
6
+
7
+ from .constants import MAX_CALLS_LIMIT
8
+ from .helpers.activity import register_activity_helpers
9
+ from .helpers.collections import register_collection_helpers
10
+ from .helpers.introspection import register_introspection_helpers
11
+ from .helpers.profiles import register_profile_helpers
12
+ from .helpers.repos import register_repo_helpers
13
+ from .http_runtime import (
14
+ _as_int,
15
+ _author_from_any,
16
+ _canonical_repo_type,
17
+ _clamp_int,
18
+ _coerce_str_list,
19
+ _dt_to_str,
20
+ _extract_author_names,
21
+ _extract_num_params,
22
+ _extract_profile_name,
23
+ _load_token,
24
+ _normalize_collection_repo_item,
25
+ _normalize_daily_paper_row,
26
+ _normalize_repo_detail_row,
27
+ _normalize_repo_search_row,
28
+ _normalize_repo_sort_key,
29
+ _normalize_trending_row,
30
+ _optional_str_list,
31
+ _repo_detail_call,
32
+ _repo_list_call,
33
+ _repo_web_url,
34
+ _sort_repo_rows,
35
+ call_api_host,
36
+ )
37
+ from .registry import PAGINATION_POLICY
38
+ from .runtime_envelopes import (
39
+ _build_exhaustive_meta,
40
+ _build_exhaustive_result_meta,
41
+ _derive_can_request_more,
42
+ _derive_limit_metadata,
43
+ _derive_more_available,
44
+ _derive_next_request_hint,
45
+ _derive_truncated_by,
46
+ _helper_error,
47
+ _helper_meta,
48
+ _helper_success,
49
+ _overview_count_only_success,
50
+ _resolve_exhaustive_limits,
51
+ )
52
+ from .runtime_filtering import (
53
+ _apply_where,
54
+ _helper_item,
55
+ _item_matches_where,
56
+ _normalize_where,
57
+ _overview_count,
58
+ _project_activity_items,
59
+ _project_actor_items,
60
+ _project_collection_items,
61
+ _project_discussion_detail_items,
62
+ _project_discussion_items,
63
+ _project_daily_paper_items,
64
+ _project_items,
65
+ _project_repo_items,
66
+ _project_user_items,
67
+ _project_user_like_items,
68
+ )
69
+ from .validation import _resolve_helper_functions
70
+
71
+ if TYPE_CHECKING:
72
+ from huggingface_hub import HfApi
73
+
74
+
75
+ class RuntimeHelperEnvironment(NamedTuple):
76
+ context: "RuntimeContext"
77
+ call_count: dict[str, int]
78
+ trace: list[dict[str, Any]]
79
+ limit_summaries: list[dict[str, Any]]
80
+ latest_helper_error_box: dict[str, dict[str, Any] | None]
81
+ internal_helper_used: dict[str, bool]
82
+ helper_functions: dict[str, Callable[..., Any]]
83
+
84
+
85
+ @dataclass(slots=True)
86
+ class RuntimeContext:
87
+ max_calls: int
88
+ strict_mode: bool
89
+ timeout_sec: int
90
+ call_count: dict[str, int] = field(default_factory=lambda: {"n": 0})
91
+ trace: list[dict[str, Any]] = field(default_factory=list)
92
+ limit_summaries: list[dict[str, Any]] = field(default_factory=list)
93
+ latest_helper_error_box: dict[str, dict[str, Any] | None] = field(
94
+ default_factory=lambda: {"value": None}
95
+ )
96
+ internal_helper_used: dict[str, bool] = field(
97
+ default_factory=lambda: {"used": False}
98
+ )
99
+ helper_registry: dict[str, Callable[..., Any]] = field(default_factory=dict)
100
+ _hf_api_client: "HfApi | None" = field(default=None, init=False, repr=False)
101
+
102
+ def _budget_remaining(self) -> int:
103
+ return max(0, self.max_calls - self.call_count["n"])
104
+
105
+ def _policy_int(self, helper_name: str, key: str, default: int) -> int:
106
+ cfg = PAGINATION_POLICY.get(helper_name) or {}
107
+ try:
108
+ return int(cfg.get(key, default))
109
+ except Exception:
110
+ return int(default)
111
+
112
+ def _consume_call(self, endpoint: str, method: str = "GET") -> int:
113
+ if self.call_count["n"] >= self.max_calls:
114
+ raise RuntimeError(f"Max API calls exceeded ({self.max_calls})")
115
+ self.call_count["n"] += 1
116
+ return self.call_count["n"]
117
+
118
+ def _trace_ok(
119
+ self, idx: int, endpoint: str, method: str = "GET", status: int = 200
120
+ ) -> None:
121
+ self.trace.append(
122
+ {
123
+ "call_index": idx,
124
+ "depth": idx,
125
+ "method": method,
126
+ "endpoint": endpoint,
127
+ "ok": True,
128
+ "status": status,
129
+ }
130
+ )
131
+
132
+ def _trace_err(
133
+ self, idx: int, endpoint: str, err: Any, method: str = "GET", status: int = 0
134
+ ) -> None:
135
+ self.trace.append(
136
+ {
137
+ "call_index": idx,
138
+ "depth": idx,
139
+ "method": method,
140
+ "endpoint": endpoint,
141
+ "ok": False,
142
+ "status": status,
143
+ "error": str(err),
144
+ }
145
+ )
146
+
147
+ def _host_raw_call(
148
+ self,
149
+ endpoint: str,
150
+ *,
151
+ params: dict[str, Any] | None = None,
152
+ method: str = "GET",
153
+ json_body: dict[str, Any] | None = None,
154
+ ) -> dict[str, Any]:
155
+ idx = self._consume_call(endpoint, method)
156
+ try:
157
+ resp = call_api_host(
158
+ endpoint,
159
+ method=method,
160
+ params=params,
161
+ json_body=json_body,
162
+ timeout_sec=self.timeout_sec,
163
+ strict_mode=self.strict_mode,
164
+ )
165
+ if resp.get("ok"):
166
+ self._trace_ok(
167
+ idx, endpoint, method=method, status=int(resp.get("status") or 200)
168
+ )
169
+ else:
170
+ self._trace_err(
171
+ idx,
172
+ endpoint,
173
+ resp.get("error"),
174
+ method=method,
175
+ status=int(resp.get("status") or 0),
176
+ )
177
+ return resp
178
+ except Exception as exc:
179
+ self._trace_err(idx, endpoint, exc, method=method, status=0)
180
+ raise
181
+
182
+ def _get_hf_api_client(self) -> "HfApi":
183
+ if self._hf_api_client is None:
184
+ from huggingface_hub import HfApi
185
+
186
+ endpoint = os.getenv("HF_ENDPOINT", "https://huggingface.co").rstrip("/")
187
+ self._hf_api_client = HfApi(endpoint=endpoint, token=_load_token())
188
+ return self._hf_api_client
189
+
190
+ def _host_hf_call(self, endpoint: str, fn: Callable[[], Any]) -> Any:
191
+ idx = self._consume_call(endpoint, "GET")
192
+ try:
193
+ out = fn()
194
+ self._trace_ok(idx, endpoint, method="GET", status=200)
195
+ return out
196
+ except Exception as exc:
197
+ self._trace_err(idx, endpoint, exc, method="GET", status=0)
198
+ raise
199
+
200
+ async def call_helper(self, helper_name: str, /, *args: Any, **kwargs: Any) -> Any:
201
+ fn = self.helper_registry.get(helper_name)
202
+ if not callable(fn):
203
+ raise RuntimeError(f"Helper '{helper_name}' is not registered")
204
+ return await cast(Callable[..., Any], fn)(*args, **kwargs)
205
+
206
+
207
+ for name, value in {
208
+ "_helper_meta": _helper_meta,
209
+ "_derive_limit_metadata": _derive_limit_metadata,
210
+ "_derive_more_available": _derive_more_available,
211
+ "_derive_truncated_by": _derive_truncated_by,
212
+ "_derive_can_request_more": _derive_can_request_more,
213
+ "_derive_next_request_hint": _derive_next_request_hint,
214
+ "_resolve_exhaustive_limits": _resolve_exhaustive_limits,
215
+ "_build_exhaustive_meta": _build_exhaustive_meta,
216
+ "_overview_count_only_success": _overview_count_only_success,
217
+ "_build_exhaustive_result_meta": _build_exhaustive_result_meta,
218
+ "_helper_success": _helper_success,
219
+ "_helper_error": _helper_error,
220
+ "_project_items": _project_items,
221
+ "_project_repo_items": _project_repo_items,
222
+ "_project_collection_items": _project_collection_items,
223
+ "_project_discussion_items": _project_discussion_items,
224
+ "_project_discussion_detail_items": _project_discussion_detail_items,
225
+ "_project_daily_paper_items": _project_daily_paper_items,
226
+ "_project_user_items": _project_user_items,
227
+ "_project_actor_items": _project_actor_items,
228
+ "_project_user_like_items": _project_user_like_items,
229
+ "_project_activity_items": _project_activity_items,
230
+ "_normalize_where": _normalize_where,
231
+ "_item_matches_where": _item_matches_where,
232
+ "_apply_where": _apply_where,
233
+ "_helper_item": _helper_item,
234
+ "_overview_count": _overview_count,
235
+ "_as_int": staticmethod(_as_int),
236
+ "_author_from_any": staticmethod(_author_from_any),
237
+ "_canonical_repo_type": staticmethod(_canonical_repo_type),
238
+ "_clamp_int": staticmethod(_clamp_int),
239
+ "_coerce_str_list": staticmethod(_coerce_str_list),
240
+ "_dt_to_str": staticmethod(_dt_to_str),
241
+ "_extract_author_names": staticmethod(_extract_author_names),
242
+ "_extract_num_params": staticmethod(_extract_num_params),
243
+ "_extract_profile_name": staticmethod(_extract_profile_name),
244
+ "_load_token": staticmethod(_load_token),
245
+ "_normalize_collection_repo_item": staticmethod(_normalize_collection_repo_item),
246
+ "_normalize_daily_paper_row": staticmethod(_normalize_daily_paper_row),
247
+ "_normalize_repo_detail_row": staticmethod(_normalize_repo_detail_row),
248
+ "_normalize_repo_search_row": staticmethod(_normalize_repo_search_row),
249
+ "_normalize_repo_sort_key": staticmethod(_normalize_repo_sort_key),
250
+ "_normalize_trending_row": staticmethod(_normalize_trending_row),
251
+ "_optional_str_list": staticmethod(_optional_str_list),
252
+ "_repo_detail_call": staticmethod(_repo_detail_call),
253
+ "_repo_list_call": staticmethod(_repo_list_call),
254
+ "_repo_web_url": staticmethod(_repo_web_url),
255
+ "_sort_repo_rows": staticmethod(_sort_repo_rows),
256
+ }.items():
257
+ setattr(RuntimeContext, name, value)
258
+
259
+
260
+ def build_runtime_helper_environment(
261
+ *,
262
+ max_calls: int,
263
+ strict_mode: bool,
264
+ timeout_sec: int,
265
+ ) -> RuntimeHelperEnvironment:
266
+ ctx = RuntimeContext(
267
+ max_calls=max(1, min(int(max_calls), MAX_CALLS_LIMIT)),
268
+ strict_mode=strict_mode,
269
+ timeout_sec=timeout_sec,
270
+ )
271
+
272
+ for registration in (
273
+ register_profile_helpers,
274
+ register_repo_helpers,
275
+ register_activity_helpers,
276
+ register_collection_helpers,
277
+ register_introspection_helpers,
278
+ ):
279
+ ctx.helper_registry.update(registration(ctx))
280
+
281
+ helper_functions = _resolve_helper_functions(ctx.helper_registry)
282
+ return RuntimeHelperEnvironment(
283
+ context=ctx,
284
+ call_count=ctx.call_count,
285
+ trace=ctx.trace,
286
+ limit_summaries=ctx.limit_summaries,
287
+ latest_helper_error_box=ctx.latest_helper_error_box,
288
+ internal_helper_used=ctx.internal_helper_used,
289
+ helper_functions=helper_functions,
290
+ )
.prod/monty_api/runtime_envelopes.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from .http_runtime import _as_int, _clamp_int
6
+
7
+
8
+ def _helper_meta(
9
+ self: Any, start_calls: int, *, source: str, **extra: Any
10
+ ) -> dict[str, Any]:
11
+ out = {
12
+ "source": source,
13
+ "normalized": True,
14
+ "budget_used": max(0, self.call_count["n"] - start_calls),
15
+ "budget_remaining": self._budget_remaining(),
16
+ }
17
+ out.update(extra)
18
+ return out
19
+
20
+
21
+ def _derive_limit_metadata(
22
+ self: Any,
23
+ *,
24
+ requested_limit: int | None,
25
+ applied_limit: int,
26
+ default_limit_used: bool,
27
+ requested_scan_limit: int | None = None,
28
+ applied_scan_limit: int | None = None,
29
+ requested_max_pages: int | None = None,
30
+ applied_max_pages: int | None = None,
31
+ ) -> dict[str, Any]:
32
+ meta: dict[str, Any] = {
33
+ "requested_limit": requested_limit,
34
+ "applied_limit": applied_limit,
35
+ "default_limit_used": default_limit_used,
36
+ }
37
+ if requested_scan_limit is not None or applied_scan_limit is not None:
38
+ meta["requested_scan_limit"] = requested_scan_limit
39
+ meta["scan_limit"] = applied_scan_limit
40
+ meta["scan_limit_applied"] = requested_scan_limit != applied_scan_limit
41
+ if requested_max_pages is not None or applied_max_pages is not None:
42
+ meta["requested_max_pages"] = requested_max_pages
43
+ meta["applied_max_pages"] = applied_max_pages
44
+ meta["page_limit_applied"] = requested_max_pages != applied_max_pages
45
+ if requested_limit is not None:
46
+ meta["hard_cap_applied"] = applied_limit < requested_limit
47
+ return meta
48
+
49
+
50
+ def _derive_more_available(
51
+ self: Any,
52
+ *,
53
+ sample_complete: bool,
54
+ exact_count: bool,
55
+ returned: int,
56
+ total: int | None,
57
+ ) -> bool | str:
58
+ if sample_complete:
59
+ return False
60
+ if exact_count and total is not None and returned < total:
61
+ return True
62
+ return "unknown"
63
+
64
+
65
+ def _derive_truncated_by(
66
+ self: Any,
67
+ *,
68
+ hard_cap: bool = False,
69
+ scan_limit_hit: bool = False,
70
+ page_limit_hit: bool = False,
71
+ limit_hit: bool = False,
72
+ ) -> str:
73
+ causes = [hard_cap, scan_limit_hit, page_limit_hit, limit_hit]
74
+ if sum(1 for cause in causes if cause) > 1:
75
+ return "multiple"
76
+ if hard_cap:
77
+ return "hard_cap"
78
+ if scan_limit_hit:
79
+ return "scan_limit"
80
+ if page_limit_hit:
81
+ return "page_limit"
82
+ if limit_hit:
83
+ return "limit"
84
+ return "none"
85
+
86
+
87
+ def _derive_can_request_more(
88
+ self: Any, *, sample_complete: bool, truncated_by: str
89
+ ) -> bool:
90
+ if sample_complete:
91
+ return False
92
+ return truncated_by in {"limit", "scan_limit", "page_limit", "multiple"}
93
+
94
+
95
+ def _derive_next_request_hint(
96
+ self: Any,
97
+ *,
98
+ truncated_by: str,
99
+ more_available: bool | str,
100
+ applied_limit: int,
101
+ applied_scan_limit: int | None = None,
102
+ applied_max_pages: int | None = None,
103
+ ) -> str:
104
+ if truncated_by == "limit":
105
+ return f"Ask for limit>{applied_limit} to see more rows"
106
+ if truncated_by == "scan_limit" and applied_scan_limit is not None:
107
+ return f"Increase scan_limit above {applied_scan_limit} for broader coverage"
108
+ if truncated_by == "page_limit" and applied_max_pages is not None:
109
+ return f"Increase max_pages above {applied_max_pages} to continue paging"
110
+ if truncated_by == "hard_cap":
111
+ return "No more rows can be returned in a single call because a hard cap was applied"
112
+ if truncated_by == "multiple":
113
+ return "Increase the relevant return/page/scan bounds to improve coverage"
114
+ if more_available is False:
115
+ return "No more results available"
116
+ if more_available == "unknown":
117
+ return "More results may exist; narrow filters or raise scan/page bounds for better coverage"
118
+ return "Ask for a larger limit to see more rows"
119
+
120
+
121
+ def _resolve_exhaustive_limits(
122
+ self: Any,
123
+ *,
124
+ limit: int | None,
125
+ count_only: bool,
126
+ default_limit: int,
127
+ max_limit: int,
128
+ scan_limit: int | None = None,
129
+ scan_cap: int | None = None,
130
+ ) -> dict[str, Any]:
131
+ requested_limit = None if count_only else limit
132
+ effective_requested_limit = 0 if count_only else requested_limit
133
+ out: dict[str, Any] = {
134
+ "requested_limit": requested_limit,
135
+ "applied_limit": _clamp_int(
136
+ effective_requested_limit,
137
+ default=default_limit,
138
+ minimum=0,
139
+ maximum=max_limit,
140
+ ),
141
+ "default_limit_used": requested_limit is None and not count_only,
142
+ }
143
+ out["hard_cap_applied"] = (
144
+ requested_limit is not None and out["applied_limit"] < requested_limit
145
+ )
146
+ if scan_cap is not None:
147
+ out["requested_scan_limit"] = scan_limit
148
+ out["applied_scan_limit"] = _clamp_int(
149
+ scan_limit,
150
+ default=scan_cap,
151
+ minimum=1,
152
+ maximum=scan_cap,
153
+ )
154
+ return out
155
+
156
+
157
+ def _build_exhaustive_meta(
158
+ self: Any,
159
+ *,
160
+ base_meta: dict[str, Any],
161
+ limit_plan: dict[str, Any],
162
+ sample_complete: bool,
163
+ exact_count: bool,
164
+ truncated_by: str,
165
+ more_available: bool | str,
166
+ requested_max_pages: int | None = None,
167
+ applied_max_pages: int | None = None,
168
+ ) -> dict[str, Any]:
169
+ meta = dict(base_meta)
170
+ applied_limit = int(limit_plan["applied_limit"])
171
+ applied_scan_limit = limit_plan.get("applied_scan_limit")
172
+ meta.update(
173
+ {
174
+ "complete": sample_complete,
175
+ "exact_count": exact_count,
176
+ "sample_complete": sample_complete,
177
+ "more_available": more_available,
178
+ "can_request_more": _derive_can_request_more(
179
+ self,
180
+ sample_complete=sample_complete,
181
+ truncated_by=truncated_by,
182
+ ),
183
+ "truncated_by": truncated_by,
184
+ "next_request_hint": _derive_next_request_hint(
185
+ self,
186
+ truncated_by=truncated_by,
187
+ more_available=more_available,
188
+ applied_limit=applied_limit,
189
+ applied_scan_limit=applied_scan_limit
190
+ if isinstance(applied_scan_limit, int)
191
+ else None,
192
+ applied_max_pages=applied_max_pages,
193
+ ),
194
+ }
195
+ )
196
+ meta.update(
197
+ _derive_limit_metadata(
198
+ self,
199
+ requested_limit=limit_plan["requested_limit"],
200
+ applied_limit=applied_limit,
201
+ default_limit_used=bool(limit_plan["default_limit_used"]),
202
+ requested_scan_limit=limit_plan.get("requested_scan_limit"),
203
+ applied_scan_limit=applied_scan_limit
204
+ if isinstance(applied_scan_limit, int)
205
+ else None,
206
+ requested_max_pages=requested_max_pages,
207
+ applied_max_pages=applied_max_pages,
208
+ )
209
+ )
210
+ return meta
211
+
212
+
213
+ def _overview_count_only_success(
214
+ self: Any,
215
+ *,
216
+ start_calls: int,
217
+ source: str,
218
+ total: int,
219
+ limit_plan: dict[str, Any],
220
+ base_meta: dict[str, Any],
221
+ ) -> dict[str, Any]:
222
+ meta = _build_exhaustive_meta(
223
+ self,
224
+ base_meta={
225
+ **base_meta,
226
+ "matched": total,
227
+ "returned": 0,
228
+ "total": total,
229
+ "total_available": total,
230
+ "total_matched": total,
231
+ "truncated": False,
232
+ },
233
+ limit_plan=limit_plan,
234
+ sample_complete=True,
235
+ exact_count=True,
236
+ truncated_by="none",
237
+ more_available=False,
238
+ )
239
+ return _helper_success(
240
+ self,
241
+ start_calls=start_calls,
242
+ source=source,
243
+ items=[],
244
+ meta=meta,
245
+ )
246
+
247
+
248
+ def _build_exhaustive_result_meta(
249
+ self: Any,
250
+ *,
251
+ base_meta: dict[str, Any],
252
+ limit_plan: dict[str, Any],
253
+ matched_count: int,
254
+ returned_count: int,
255
+ exact_count: bool,
256
+ count_only: bool = False,
257
+ sample_complete: bool | None = None,
258
+ more_available: bool | str | None = None,
259
+ scan_limit_hit: bool = False,
260
+ page_limit_hit: bool = False,
261
+ truncated_extra: bool = False,
262
+ requested_max_pages: int | None = None,
263
+ applied_max_pages: int | None = None,
264
+ ) -> dict[str, Any]:
265
+ applied_limit = int(limit_plan["applied_limit"])
266
+ if count_only:
267
+ effective_sample_complete = exact_count
268
+ else:
269
+ effective_sample_complete = (
270
+ sample_complete
271
+ if isinstance(sample_complete, bool)
272
+ else exact_count and matched_count <= applied_limit
273
+ )
274
+ limit_hit = (
275
+ False
276
+ if count_only
277
+ else (applied_limit > 0 and matched_count > applied_limit)
278
+ )
279
+ truncated_by = _derive_truncated_by(
280
+ self,
281
+ hard_cap=bool(limit_plan.get("hard_cap_applied")),
282
+ scan_limit_hit=scan_limit_hit,
283
+ page_limit_hit=page_limit_hit,
284
+ limit_hit=limit_hit,
285
+ )
286
+ truncated = truncated_by != "none" or truncated_extra
287
+ total_value = _as_int(base_meta.get("total"))
288
+ effective_more_available = more_available
289
+ if count_only and exact_count:
290
+ effective_more_available = False
291
+ if effective_more_available is None:
292
+ effective_more_available = _derive_more_available(
293
+ self,
294
+ sample_complete=effective_sample_complete,
295
+ exact_count=exact_count,
296
+ returned=returned_count,
297
+ total=total_value,
298
+ )
299
+
300
+ return _build_exhaustive_meta(
301
+ self,
302
+ base_meta={
303
+ **base_meta,
304
+ "matched": matched_count,
305
+ "returned": returned_count,
306
+ "truncated": truncated,
307
+ },
308
+ limit_plan=limit_plan,
309
+ sample_complete=effective_sample_complete,
310
+ exact_count=exact_count,
311
+ truncated_by=truncated_by,
312
+ more_available=effective_more_available,
313
+ requested_max_pages=requested_max_pages,
314
+ applied_max_pages=applied_max_pages,
315
+ )
316
+
317
+
318
+ def _helper_success(
319
+ self: Any,
320
+ *,
321
+ start_calls: int,
322
+ source: str,
323
+ items: list[dict[str, Any]],
324
+ cursor: str | None = None,
325
+ meta: dict[str, Any] | None = None,
326
+ **extra_meta: Any,
327
+ ) -> dict[str, Any]:
328
+ merged_meta = dict(meta or {})
329
+ merged_meta.update(extra_meta)
330
+ if cursor is not None:
331
+ merged_meta["cursor"] = cursor
332
+ return {
333
+ "ok": True,
334
+ "item": items[0] if len(items) == 1 else None,
335
+ "items": items,
336
+ "meta": _helper_meta(self, start_calls, source=source, **merged_meta),
337
+ "error": None,
338
+ }
339
+
340
+
341
+ def _helper_error(
342
+ self: Any,
343
+ *,
344
+ start_calls: int,
345
+ source: str,
346
+ error: Any,
347
+ **meta: Any,
348
+ ) -> dict[str, Any]:
349
+ envelope = {
350
+ "ok": False,
351
+ "item": None,
352
+ "items": [],
353
+ "meta": _helper_meta(self, start_calls, source=source, **meta),
354
+ "error": str(error),
355
+ }
356
+ self.latest_helper_error_box["value"] = envelope
357
+ return envelope
.prod/monty_api/runtime_filtering.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from .constants import (
6
+ ACTIVITY_CANONICAL_FIELDS,
7
+ ACTOR_CANONICAL_FIELDS,
8
+ COLLECTION_CANONICAL_FIELDS,
9
+ DAILY_PAPER_CANONICAL_FIELDS,
10
+ DISCUSSION_CANONICAL_FIELDS,
11
+ DISCUSSION_DETAIL_CANONICAL_FIELDS,
12
+ REPO_CANONICAL_FIELDS,
13
+ USER_CANONICAL_FIELDS,
14
+ USER_LIKES_CANONICAL_FIELDS,
15
+ )
16
+ from .http_runtime import _as_int
17
+
18
+
19
+ def _allowed_field_set(allowed_fields: tuple[str, ...] | list[str] | set[str]) -> set[str]:
20
+ return {str(field).strip() for field in allowed_fields if str(field).strip()}
21
+
22
+
23
+ def _project_items(
24
+ self: Any,
25
+ items: list[dict[str, Any]],
26
+ fields: list[str] | None,
27
+ *,
28
+ allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
29
+ ) -> list[dict[str, Any]]:
30
+ if not isinstance(fields, list) or not fields:
31
+ return items
32
+ wanted = [str(field).strip() for field in fields if str(field).strip()]
33
+ if not wanted:
34
+ return items
35
+ if allowed_fields is not None:
36
+ allowed = _allowed_field_set(allowed_fields)
37
+ invalid = sorted(field for field in wanted if field not in allowed)
38
+ if invalid:
39
+ raise ValueError(
40
+ f"Unsupported fields {invalid}. Allowed fields: {sorted(allowed)}"
41
+ )
42
+ projected: list[dict[str, Any]] = []
43
+ for row in items:
44
+ out: dict[str, Any] = {}
45
+ for key in wanted:
46
+ value = row.get(key)
47
+ if value is None:
48
+ continue
49
+ out[key] = value
50
+ projected.append(out)
51
+ return projected
52
+
53
+
54
+ def _project_repo_items(
55
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
56
+ ) -> list[dict[str, Any]]:
57
+ return _project_items(self, items, fields, allowed_fields=REPO_CANONICAL_FIELDS)
58
+
59
+
60
+ def _project_collection_items(
61
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
62
+ ) -> list[dict[str, Any]]:
63
+ return _project_items(
64
+ self, items, fields, allowed_fields=COLLECTION_CANONICAL_FIELDS
65
+ )
66
+
67
+
68
+ def _project_daily_paper_items(
69
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
70
+ ) -> list[dict[str, Any]]:
71
+ return _project_items(
72
+ self, items, fields, allowed_fields=DAILY_PAPER_CANONICAL_FIELDS
73
+ )
74
+
75
+
76
+ def _project_user_items(
77
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
78
+ ) -> list[dict[str, Any]]:
79
+ return _project_items(self, items, fields, allowed_fields=USER_CANONICAL_FIELDS)
80
+
81
+
82
+ def _project_actor_items(
83
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
84
+ ) -> list[dict[str, Any]]:
85
+ return _project_items(self, items, fields, allowed_fields=ACTOR_CANONICAL_FIELDS)
86
+
87
+
88
+ def _project_user_like_items(
89
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
90
+ ) -> list[dict[str, Any]]:
91
+ return _project_items(
92
+ self, items, fields, allowed_fields=USER_LIKES_CANONICAL_FIELDS
93
+ )
94
+
95
+
96
+ def _project_activity_items(
97
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
98
+ ) -> list[dict[str, Any]]:
99
+ return _project_items(
100
+ self, items, fields, allowed_fields=ACTIVITY_CANONICAL_FIELDS
101
+ )
102
+
103
+
104
+ def _project_discussion_items(
105
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
106
+ ) -> list[dict[str, Any]]:
107
+ return _project_items(
108
+ self, items, fields, allowed_fields=DISCUSSION_CANONICAL_FIELDS
109
+ )
110
+
111
+
112
+ def _project_discussion_detail_items(
113
+ self: Any, items: list[dict[str, Any]], fields: list[str] | None
114
+ ) -> list[dict[str, Any]]:
115
+ return _project_items(
116
+ self, items, fields, allowed_fields=DISCUSSION_DETAIL_CANONICAL_FIELDS
117
+ )
118
+
119
+
120
+ def _normalize_where(
121
+ self: Any,
122
+ where: dict[str, Any] | None,
123
+ *,
124
+ allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
125
+ ) -> dict[str, Any] | None:
126
+ if not isinstance(where, dict) or not where:
127
+ return where
128
+ allowed = _allowed_field_set(allowed_fields) if allowed_fields is not None else None
129
+ normalized: dict[str, Any] = {}
130
+ for key, value in where.items():
131
+ raw_key = str(key).strip()
132
+ if not raw_key:
133
+ continue
134
+ if allowed is not None and raw_key not in allowed:
135
+ raise ValueError(
136
+ f"Unsupported filter fields {[raw_key]}. Allowed fields: {sorted(allowed)}"
137
+ )
138
+ normalized[raw_key] = value
139
+ return normalized
140
+
141
+
142
+ def _item_matches_where(
143
+ self: Any, item: dict[str, Any], where: dict[str, Any] | None
144
+ ) -> bool:
145
+ if not isinstance(where, dict) or not where:
146
+ return True
147
+ for key, cond in where.items():
148
+ value = item.get(str(key))
149
+ if isinstance(cond, dict):
150
+ if "eq" in cond and value != cond.get("eq"):
151
+ return False
152
+ if "in" in cond:
153
+ allowed = cond.get("in")
154
+ if isinstance(allowed, (list, tuple, set)) and value not in allowed:
155
+ return False
156
+ if "contains" in cond:
157
+ needle = cond.get("contains")
158
+ if (
159
+ not isinstance(value, str)
160
+ or not isinstance(needle, str)
161
+ or needle not in value
162
+ ):
163
+ return False
164
+ if "icontains" in cond:
165
+ needle = cond.get("icontains")
166
+ if (
167
+ not isinstance(value, str)
168
+ or not isinstance(needle, str)
169
+ or needle.lower() not in value.lower()
170
+ ):
171
+ return False
172
+ if "gte" in cond:
173
+ left = _as_int(value)
174
+ right = _as_int(cond.get("gte"))
175
+ if left is None or right is None or left < right:
176
+ return False
177
+ if "lte" in cond:
178
+ left = _as_int(value)
179
+ right = _as_int(cond.get("lte"))
180
+ if left is None or right is None or left > right:
181
+ return False
182
+ continue
183
+ if isinstance(cond, (list, tuple, set)):
184
+ if value not in cond:
185
+ return False
186
+ continue
187
+ if value != cond:
188
+ return False
189
+ return True
190
+
191
+
192
+ def _apply_where(
193
+ self: Any,
194
+ items: list[dict[str, Any]],
195
+ where: dict[str, Any] | None,
196
+ *,
197
+ allowed_fields: tuple[str, ...] | list[str] | set[str] | None = None,
198
+ ) -> list[dict[str, Any]]:
199
+ normalized_where = _normalize_where(self, where, allowed_fields=allowed_fields)
200
+ if not isinstance(normalized_where, dict) or not normalized_where:
201
+ return items
202
+ return [row for row in items if _item_matches_where(self, row, normalized_where)]
203
+
204
+
205
+ def _helper_item(self: Any, resp: dict[str, Any]) -> dict[str, Any] | None:
206
+ item = resp.get("item")
207
+ if isinstance(item, dict):
208
+ return item
209
+ items = resp.get("items")
210
+ if isinstance(items, list) and items and isinstance(items[0], dict):
211
+ return items[0]
212
+ return None
213
+
214
+
215
+ def _overview_count(self: Any, item: dict[str, Any] | None, key: str) -> int | None:
216
+ if not isinstance(item, dict):
217
+ return None
218
+ return _as_int(item.get(key))
.prod/monty_api/tool_entrypoints.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """File-based function tool entrypoints for the production Monty runtime."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ _PACKAGE_DIR = Path(__file__).resolve().parent
11
+ _ROOT_DIR = _PACKAGE_DIR.parent
12
+ for candidate in (_ROOT_DIR, _PACKAGE_DIR):
13
+ candidate_str = str(candidate)
14
+ if candidate_str not in sys.path:
15
+ sys.path.insert(0, candidate_str)
16
+
17
+ from monty_api import ( # noqa: E402
18
+ HELPER_EXTERNALS,
19
+ hf_hub_query as _hf_hub_query,
20
+ hf_hub_query_raw as _hf_hub_query_raw,
21
+ main,
22
+ )
23
+
24
+
25
+ async def hf_hub_query(
26
+ query: str,
27
+ code: str,
28
+ max_calls: int | None = None,
29
+ timeout_sec: int | None = None,
30
+ ) -> dict[str, Any]:
31
+ return await _hf_hub_query(
32
+ query=query,
33
+ code=code,
34
+ max_calls=max_calls,
35
+ timeout_sec=timeout_sec,
36
+ )
37
+
38
+
39
+ async def hf_hub_query_raw(
40
+ query: str,
41
+ code: str,
42
+ max_calls: int | None = None,
43
+ timeout_sec: int | None = None,
44
+ ) -> Any:
45
+ return await _hf_hub_query_raw(
46
+ query=query,
47
+ code=code,
48
+ max_calls=max_calls,
49
+ timeout_sec=timeout_sec,
50
+ )
51
+
52
+ __all__ = [
53
+ "HELPER_EXTERNALS",
54
+ "hf_hub_query",
55
+ "hf_hub_query_raw",
56
+ "main",
57
+ ]
58
+
59
+ if __name__ == "__main__":
60
+ raise SystemExit(main())
.prod/monty_api/validation.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import re
5
+ import tokenize
6
+ from io import StringIO
7
+ from typing import Any, Callable, cast
8
+
9
+ from .constants import (
10
+ GRAPH_SCAN_LIMIT_CAP,
11
+ LIKES_SCAN_LIMIT_CAP,
12
+ OUTPUT_ITEMS_TRUNCATION_LIMIT,
13
+ SELECTIVE_ENDPOINT_RETURN_HARD_CAP,
14
+ TRENDING_ENDPOINT_MAX_LIMIT,
15
+ )
16
+ from .registry import (
17
+ ALLOWLIST_PATTERNS,
18
+ HELPER_EXTERNALS,
19
+ STRICT_ALLOWLIST_PATTERNS,
20
+ )
21
+
22
+
23
+ def _resolve_helper_functions(
24
+ namespace: dict[str, Any],
25
+ ) -> dict[str, Callable[..., Any]]:
26
+ resolved: dict[str, Callable[..., Any]] = {}
27
+ for helper_name in HELPER_EXTERNALS:
28
+ candidate = namespace.get(helper_name)
29
+ if not callable(candidate):
30
+ raise RuntimeError(f"Helper '{helper_name}' is not defined or not callable")
31
+ resolved[helper_name] = cast(Callable[..., Any], candidate)
32
+ return resolved
33
+
34
+
35
+ def _normalize_endpoint(endpoint: str) -> str:
36
+ ep = (endpoint or "").strip()
37
+ if not ep:
38
+ raise ValueError("endpoint is required")
39
+ if "?" in ep:
40
+ raise ValueError("endpoint must not include query string; use params")
41
+ if ep.startswith("http://") or ep.startswith("https://"):
42
+ raise ValueError("endpoint must be path-only")
43
+ if not ep.startswith("/"):
44
+ ep = "/" + ep
45
+ if not ep.startswith("/api/"):
46
+ ep = "/api" + ep
47
+ if ep in {"/api/collections/search", "/api/collections/search/"}:
48
+ ep = "/api/collections"
49
+ if ".." in ep:
50
+ raise ValueError("path traversal not allowed")
51
+ return ep
52
+
53
+
54
+ def _endpoint_allowed(endpoint: str, strict_mode: bool) -> bool:
55
+ path = endpoint.split("?", 1)[0]
56
+ patterns = STRICT_ALLOWLIST_PATTERNS if strict_mode else ALLOWLIST_PATTERNS
57
+ return any(re.match(p, path) for p in patterns)
58
+
59
+
60
+ def _sanitize_params(endpoint: str, params: dict[str, Any] | None) -> dict[str, Any]:
61
+ clean = dict(params or {})
62
+ path = endpoint.split("?", 1)[0]
63
+
64
+ if path == "/api/collections":
65
+ if "q" not in clean and "search" in clean:
66
+ clean["q"] = clean.get("search")
67
+ clean.pop("search", None)
68
+
69
+ if path == "/api/trending":
70
+ t = str(clean.get("type") or "").strip().lower()
71
+ aliases = {"models": "model", "datasets": "dataset", "spaces": "space"}
72
+ if t in aliases:
73
+ clean["type"] = aliases[t]
74
+ lim = clean.get("limit")
75
+ if lim is not None:
76
+ try:
77
+ n = int(lim)
78
+ except Exception:
79
+ n = TRENDING_ENDPOINT_MAX_LIMIT
80
+ clean["limit"] = max(1, min(n, TRENDING_ENDPOINT_MAX_LIMIT))
81
+ return clean
82
+
83
+ lim = clean.get("limit")
84
+ if lim is None:
85
+ return clean
86
+ try:
87
+ n = int(lim)
88
+ except Exception:
89
+ return clean
90
+
91
+ endpoint_limit_max = SELECTIVE_ENDPOINT_RETURN_HARD_CAP
92
+ if re.match(r"^/api/users/[^/]+/(followers|following)$", path):
93
+ endpoint_limit_max = GRAPH_SCAN_LIMIT_CAP
94
+ elif re.match(r"^/api/users/[^/]+/likes$", path):
95
+ endpoint_limit_max = LIKES_SCAN_LIMIT_CAP
96
+
97
+ clean["limit"] = max(1, min(n, endpoint_limit_max))
98
+ return clean
99
+
100
+
101
+ def _truncate_result_payload(output: Any) -> Any:
102
+ if not isinstance(output, dict):
103
+ return output
104
+
105
+ items = output.get("items")
106
+ if not isinstance(items, list) or len(items) <= OUTPUT_ITEMS_TRUNCATION_LIMIT:
107
+ return output
108
+
109
+ trimmed = dict(output)
110
+ trimmed_items = items[:OUTPUT_ITEMS_TRUNCATION_LIMIT]
111
+ trimmed["items"] = trimmed_items
112
+ trimmed["item"] = trimmed_items[0] if len(trimmed_items) == 1 else None
113
+ note = f"truncated items to first {OUTPUT_ITEMS_TRUNCATION_LIMIT} rows for token efficiency"
114
+ steps = trimmed.get("steps")
115
+ if isinstance(steps, list):
116
+ trimmed["steps"] = [*steps, note]
117
+ else:
118
+ trimmed["steps"] = [note]
119
+ return trimmed
120
+
121
+
122
+ def _is_helper_envelope(output: Any) -> bool:
123
+ return (
124
+ isinstance(output, dict)
125
+ and isinstance(output.get("ok"), bool)
126
+ and "items" in output
127
+ and "meta" in output
128
+ and "error" in output
129
+ )
130
+
131
+
132
+ def _summarize_limit_hit(helper_name: str, result: Any) -> dict[str, Any] | None:
133
+ if not _is_helper_envelope(result):
134
+ return None
135
+ meta = result.get("meta") if isinstance(result.get("meta"), dict) else {}
136
+ if not isinstance(meta, dict):
137
+ return None
138
+
139
+ truncated_by = str(meta.get("truncated_by") or "")
140
+ limit_hit = any(
141
+ [
142
+ meta.get("truncated") is True,
143
+ meta.get("hard_cap_applied") is True,
144
+ truncated_by in {"scan_limit", "page_limit", "multiple"},
145
+ ]
146
+ )
147
+ if not limit_hit:
148
+ return None
149
+
150
+ summary: dict[str, Any] = {
151
+ "helper": helper_name,
152
+ "source": meta.get("source"),
153
+ "returned": meta.get("returned"),
154
+ "total": meta.get("total"),
155
+ "truncated": meta.get("truncated"),
156
+ "truncated_by": meta.get("truncated_by"),
157
+ "more_available": meta.get("more_available"),
158
+ "requested_limit": meta.get("requested_limit"),
159
+ "applied_limit": meta.get("applied_limit"),
160
+ "next_request_hint": meta.get("next_request_hint"),
161
+ "limit_boundary_hit": meta.get("limit_boundary_hit"),
162
+ }
163
+ if meta.get("scan_limit") is not None:
164
+ summary["scan_limit"] = meta.get("scan_limit")
165
+ if meta.get("applied_max_pages") is not None:
166
+ summary["applied_max_pages"] = meta.get("applied_max_pages")
167
+ for key in (
168
+ "ranking_window",
169
+ "requested_ranking_window",
170
+ "ranking_window_applied",
171
+ "ranking_window_hit",
172
+ "ranking_complete",
173
+ "ranking_next_request_hint",
174
+ ):
175
+ if meta.get(key) is not None:
176
+ summary[key] = meta.get(key)
177
+ return summary
178
+
179
+
180
+ def _wrap_raw_result(
181
+ result: Any,
182
+ *,
183
+ ok: bool,
184
+ api_calls: int,
185
+ elapsed_ms: int,
186
+ limit_summaries: list[dict[str, Any]] | None = None,
187
+ error: str | None = None,
188
+ ) -> dict[str, Any]:
189
+ hits = [dict(summary) for summary in (limit_summaries or [])[:10]]
190
+ meta: dict[str, Any] = {
191
+ "ok": ok,
192
+ "api_calls": api_calls,
193
+ "elapsed_ms": elapsed_ms,
194
+ "limits_reached": bool(hits),
195
+ "limit_summary": hits,
196
+ }
197
+ if error is not None:
198
+ meta["error"] = error
199
+ return {
200
+ "result": result,
201
+ "meta": meta,
202
+ }
203
+
204
+
205
+ def _validate_generated_code(code: str) -> None:
206
+ if not code.strip():
207
+ raise ValueError("Generated code is empty")
208
+
209
+ blocked_patterns: list[tuple[str, str]] = [
210
+ (r"(?m)^\s*import\s+\S", "import statement"),
211
+ (r"(?m)^\s*from\s+\S+\s+import\s+\S", "from-import statement"),
212
+ (r"\bexec\s*\(", "exec("),
213
+ (r"\beval\s*\(", "eval("),
214
+ (r"\bopen\s*\(", "open("),
215
+ (r"\b__import__\b", "__import__"),
216
+ (r"(?i)\bwhile\s+true\b", "while true"),
217
+ ]
218
+ for pattern, label in blocked_patterns:
219
+ if re.search(pattern, code):
220
+ raise ValueError(f"Generated code contains blocked pattern: {label}")
221
+
222
+ try:
223
+ parsed = compile( # noqa: S102 - compile is used for AST validation only.
224
+ code,
225
+ "<generated-monty-code>",
226
+ "exec",
227
+ flags=ast.PyCF_ONLY_AST | ast.PyCF_ALLOW_TOP_LEVEL_AWAIT,
228
+ dont_inherit=True,
229
+ )
230
+ except SyntaxError as e:
231
+ message = e.msg or "invalid syntax"
232
+ raise ValueError(f"Generated code is not valid Python: {message}") from e
233
+
234
+ if not isinstance(parsed, ast.Module):
235
+ raise ValueError("Generated code must be a Python module")
236
+
237
+ solve_defs = [
238
+ node
239
+ for node in parsed.body
240
+ if isinstance(node, ast.AsyncFunctionDef) and node.name == "solve"
241
+ ]
242
+ if not solve_defs:
243
+ raise ValueError(
244
+ "Generated code must define `async def solve(query, max_calls): ...`."
245
+ )
246
+
247
+ def _valid_solve_signature(node: ast.AsyncFunctionDef) -> bool:
248
+ args = node.args
249
+ return (
250
+ not args.posonlyargs
251
+ and len(args.args) == 2
252
+ and [arg.arg for arg in args.args] == ["query", "max_calls"]
253
+ and args.vararg is None
254
+ and not args.kwonlyargs
255
+ and args.kwarg is None
256
+ and not args.defaults
257
+ and not args.kw_defaults
258
+ )
259
+
260
+ if not any(_valid_solve_signature(node) for node in solve_defs):
261
+ raise ValueError(
262
+ "`solve` must have signature `async def solve(query, max_calls): ...`."
263
+ )
264
+
265
+ if not parsed.body:
266
+ raise ValueError("Generated code is empty")
267
+
268
+ final_stmt = parsed.body[-1]
269
+ valid_final_await = (
270
+ isinstance(final_stmt, ast.Expr)
271
+ and isinstance(final_stmt.value, ast.Await)
272
+ and isinstance(final_stmt.value.value, ast.Call)
273
+ and isinstance(final_stmt.value.value.func, ast.Name)
274
+ and final_stmt.value.value.func.id == "solve"
275
+ and len(final_stmt.value.value.args) == 2
276
+ and not final_stmt.value.value.keywords
277
+ and all(isinstance(arg, ast.Name) for arg in final_stmt.value.value.args)
278
+ and [cast(ast.Name, arg).id for arg in final_stmt.value.value.args]
279
+ == ["query", "max_calls"]
280
+ )
281
+ if not valid_final_await:
282
+ raise ValueError(
283
+ "Generated code must end with `await solve(query, max_calls)`."
284
+ )
285
+
286
+ for node in ast.walk(parsed):
287
+ if not isinstance(node, ast.Call):
288
+ continue
289
+ if isinstance(node.func, ast.Name) and node.func.id == "call_api":
290
+ raise ValueError(
291
+ "Generated code must use documented hf_* helpers only; raw `call_api(...)` is not part of the prompt contract."
292
+ )
293
+
294
+ helper_name_set = set(HELPER_EXTERNALS)
295
+ has_external_call = any(
296
+ isinstance(node, ast.Call)
297
+ and isinstance(node.func, ast.Name)
298
+ and node.func.id in helper_name_set
299
+ for node in ast.walk(parsed)
300
+ )
301
+ if not has_external_call:
302
+ raise ValueError(
303
+ "Generated code must call at least one documented hf_* helper."
304
+ )
305
+
306
+
307
+ def _coerce_jsonish_python_literals(code: str) -> str:
308
+ """Normalize common JSON literals into valid Python names in generated code."""
309
+ replacements = {
310
+ "true": "True",
311
+ "false": "False",
312
+ "null": "None",
313
+ }
314
+
315
+ out_tokens: list[tuple[int, str]] = []
316
+ for tok in tokenize.generate_tokens(StringIO(code).readline):
317
+ tok_type = tok.type
318
+ tok_str = tok.string
319
+ if tok_type == tokenize.NAME and tok_str in replacements:
320
+ tok_str = replacements[tok_str]
321
+ out_tokens.append((tok_type, tok_str))
322
+ return tokenize.untokenize(out_tokens)
Dockerfile CHANGED
@@ -11,11 +11,13 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
11
 
12
  WORKDIR /app
13
 
 
 
14
  RUN uv pip install --system --no-cache \
15
- fast-agent-mcp==0.6.1 \
16
- prefab-ui \
17
  huggingface_hub \
18
- pydantic-monty
19
 
20
  COPY --link ./ /app
21
  RUN chown -R 1000:1000 /app
 
11
 
12
  WORKDIR /app
13
 
14
+ COPY wheels /tmp/wheels
15
+
16
  RUN uv pip install --system --no-cache \
17
+ "fast-agent-mcp==0.6.1" \
18
+ /tmp/wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl \
19
  huggingface_hub \
20
+ "pydantic-monty==0.0.8"
21
 
22
  COPY --link ./ /app
23
  RUN chown -R 1000:1000 /app
scripts/card_includes.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ _FILE_PLACEHOLDER_RE = re.compile(r"\{\{file:([^}]+)\}\}")
7
+
8
+
9
+
10
+ def expand_file_placeholders(
11
+ text: str,
12
+ *,
13
+ workspace_root: Path,
14
+ seen: set[Path] | None = None,
15
+ ) -> str:
16
+ workspace_root = workspace_root.resolve()
17
+ active = set() if seen is None else set(seen)
18
+
19
+ def replace(match: re.Match[str]) -> str:
20
+ raw_ref = match.group(1).strip()
21
+ include_path = Path(raw_ref)
22
+ if not include_path.is_absolute():
23
+ include_path = workspace_root / include_path
24
+ include_path = include_path.resolve()
25
+ if include_path in active:
26
+ raise ValueError(f"cyclic {{file:...}} include detected at {include_path}")
27
+ included = include_path.read_text(encoding="utf-8")
28
+ return expand_file_placeholders(
29
+ included,
30
+ workspace_root=workspace_root,
31
+ seen={*active, include_path},
32
+ )
33
+
34
+ return _FILE_PLACEHOLDER_RE.sub(replace, text)
35
+
36
+
37
+
38
+ def materialize_expanded_card(
39
+ card_path: Path,
40
+ *,
41
+ workspace_root: Path,
42
+ out_dir: Path,
43
+ ) -> Path:
44
+ card_path = card_path.resolve()
45
+ expanded = expand_file_placeholders(
46
+ card_path.read_text(encoding="utf-8"),
47
+ workspace_root=workspace_root,
48
+ seen={card_path},
49
+ )
50
+ out_dir.mkdir(parents=True, exist_ok=True)
51
+ output_path = out_dir / f".{card_path.stem}.expanded{card_path.suffix}"
52
+ output_path.write_text(expanded, encoding="utf-8")
53
+ return output_path
scripts/hub_search_prefab_server.py CHANGED
@@ -1,5 +1,7 @@
1
  from __future__ import annotations
2
 
 
 
3
  import json
4
  import os
5
  import sys
@@ -10,6 +12,7 @@ from starlette.middleware import Middleware
10
  from starlette.middleware.cors import CORSMiddleware
11
  from starlette.responses import PlainTextResponse
12
 
 
13
  def _discover_workspace_root() -> Path:
14
  env_root = os.getenv("CODE_TOOLS_ROOT")
15
  if env_root:
@@ -29,13 +32,8 @@ SCRIPTS_DIR = Path(__file__).resolve().parent
29
  CARDS_DIR = PREFAB_ROOT / "agent-cards"
30
  CONFIG_PATH = PREFAB_ROOT / "fastagent.config.yaml"
31
  RAW_CARD_FILE = CARDS_DIR / "hub_search_raw.md"
32
- PREFAB_NATIVE_CARD_FILE = CARDS_DIR / "hub_search_prefab_native.md"
33
- PREFAB_LLM_RAW_CARD_FILE = CARDS_DIR / "hub_search_prefab_llm_raw.md"
34
- PREFAB_LLM_CODEGEN_CARD_FILE = CARDS_DIR / "hub_search_prefab_llm_codegen.md"
35
- PREFAB_LLM_CHAIN_CARD_FILE = CARDS_DIR / "hub_search_prefab_llm_chain.md"
36
  RAW_AGENT = "hub_search_raw"
37
- PREFAB_NATIVE_AGENT = "hub_search_prefab_native"
38
- PREFAB_LLM_CHAIN_AGENT = "hub_search_prefab_llm_chain"
39
 
40
  HOST = os.getenv("HOST", "0.0.0.0")
41
  PORT = int(os.getenv("PORT", "9999"))
@@ -66,12 +64,8 @@ from fastmcp.server.dependencies import get_access_token
66
  from fastmcp.tools import ToolResult
67
  from mcp.types import TextContent
68
  from pydantic import AnyHttpUrl
69
- from prefab_hub_ui import (
70
- build_runtime_wire,
71
- error_wire,
72
- parse_passthrough_wire,
73
- parse_runtime_payload,
74
- )
75
 
76
 
77
  class _RootResourceRemoteAuthProvider(RemoteAuthProvider):
@@ -82,6 +76,7 @@ class _RootResourceRemoteAuthProvider(RemoteAuthProvider):
82
  return self.base_url
83
 
84
 
 
85
  def _get_oauth_config() -> tuple[str | None, list[str], str]:
86
  oauth_provider = os.environ.get("FAST_AGENT_SERVE_OAUTH", "").lower()
87
  if oauth_provider in ("hf", "huggingface"):
@@ -98,16 +93,18 @@ def _get_oauth_config() -> tuple[str | None, list[str], str]:
98
  return oauth_provider, oauth_scopes, resource_url
99
 
100
 
 
 
 
 
 
 
101
  fast = FastAgent(
102
  "hub-search-prefab",
103
  config_path=str(CONFIG_PATH),
104
  parse_cli_args=False,
105
  )
106
- fast.load_agents(RAW_CARD_FILE)
107
- fast.load_agents(PREFAB_NATIVE_CARD_FILE)
108
- fast.load_agents(PREFAB_LLM_RAW_CARD_FILE)
109
- fast.load_agents(PREFAB_LLM_CODEGEN_CARD_FILE)
110
- fast.load_agents(PREFAB_LLM_CHAIN_CARD_FILE)
111
 
112
  _oauth_provider, _oauth_scopes, _oauth_resource_url = _get_oauth_config()
113
  _auth_provider = None
@@ -142,13 +139,6 @@ async def _run_raw(query: str) -> str:
142
  return await _run_agent(RAW_AGENT, query)
143
 
144
 
145
- async def _run_prefab_native(query: str) -> str:
146
- return await _run_agent(PREFAB_NATIVE_AGENT, query)
147
-
148
-
149
- async def _run_prefab_llm_chain(query: str) -> str:
150
- return await _run_agent(PREFAB_LLM_CHAIN_AGENT, query)
151
-
152
 
153
  def _get_request_bearer_token() -> str | None:
154
  access_token = get_access_token()
@@ -166,6 +156,7 @@ async def _run_agent(agent_name: str, query: str) -> str:
166
  request_bearer_token.reset(saved_token)
167
 
168
 
 
169
  def _wire_tool_result(wire: dict[str, object]) -> ToolResult:
170
  return ToolResult(
171
  content=[TextContent(type="text", text="[Rendered Prefab UI]")],
@@ -173,23 +164,16 @@ def _wire_tool_result(wire: dict[str, object]) -> ToolResult:
173
  )
174
 
175
 
 
176
  def _render_query_wire(query: str, raw_text: str) -> dict[str, object]:
177
  payload = parse_runtime_payload(raw_text)
178
  return build_runtime_wire(query, payload)
179
 
180
 
181
- def _render_prefab_wire(prefab_text: str) -> dict[str, object]:
182
- return parse_passthrough_wire(prefab_text)
183
-
184
-
185
  async def _build_query_wire(query: str) -> dict[str, object]:
186
- prefab_response = await _run_prefab_native(query)
187
- try:
188
- return _render_prefab_wire(prefab_response)
189
- except Exception:
190
- traceback.print_exc()
191
- raw = await _run_raw(query)
192
- return _render_query_wire(query, raw)
193
 
194
 
195
  def _missing_query_json() -> str:
@@ -206,7 +190,7 @@ def _missing_query_json() -> str:
206
 
207
  @mcp.tool(app=True)
208
  async def hub_search_prefab(query: str) -> ToolResult:
209
- """Run the Prefab UI service: model-authored Prefab first, raw deterministic fallback second."""
210
  try:
211
  wire = await _build_query_wire(query)
212
  except Exception as exc: # noqa: BLE001
@@ -215,21 +199,9 @@ async def hub_search_prefab(query: str) -> ToolResult:
215
  return _wire_tool_result(wire)
216
 
217
 
218
- @mcp.tool
219
- async def hub_search_prefab_native_debug(query: str | None = None) -> str:
220
- """Return the one-pass native Prefab agent payload, before fallback rendering."""
221
- if not query:
222
- return _missing_query_json()
223
- try:
224
- return await _run_prefab_native(query)
225
- except Exception as exc: # noqa: BLE001
226
- traceback.print_exc()
227
- return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
228
-
229
-
230
  @mcp.tool
231
  async def hub_search_prefab_wire(query: str | None = None) -> str:
232
- """Return final Prefab wire JSON after active-path parse and fallback logic."""
233
  if not query:
234
  return json.dumps(error_wire("Missing required argument: query"), ensure_ascii=False)
235
  try:
@@ -252,17 +224,6 @@ async def hub_search_raw_debug(query: str | None = None) -> str:
252
  return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
253
 
254
 
255
- @mcp.tool
256
- async def hub_search_prefab_llm_debug(query: str | None = None) -> str:
257
- """Return the two-pass LLM chain payload for comparison/debugging."""
258
- if not query:
259
- return _missing_query_json()
260
- try:
261
- return await _run_prefab_llm_chain(query)
262
- except Exception as exc: # noqa: BLE001
263
- traceback.print_exc()
264
- return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
265
-
266
 
267
  def main() -> None:
268
  mcp.run(
 
1
  from __future__ import annotations
2
 
3
+ # ruff: noqa: E402
4
+
5
  import json
6
  import os
7
  import sys
 
12
  from starlette.middleware.cors import CORSMiddleware
13
  from starlette.responses import PlainTextResponse
14
 
15
+
16
  def _discover_workspace_root() -> Path:
17
  env_root = os.getenv("CODE_TOOLS_ROOT")
18
  if env_root:
 
32
  CARDS_DIR = PREFAB_ROOT / "agent-cards"
33
  CONFIG_PATH = PREFAB_ROOT / "fastagent.config.yaml"
34
  RAW_CARD_FILE = CARDS_DIR / "hub_search_raw.md"
35
+ EXPANDED_CARDS_DIR = CARDS_DIR
 
 
 
36
  RAW_AGENT = "hub_search_raw"
 
 
37
 
38
  HOST = os.getenv("HOST", "0.0.0.0")
39
  PORT = int(os.getenv("PORT", "9999"))
 
64
  from fastmcp.tools import ToolResult
65
  from mcp.types import TextContent
66
  from pydantic import AnyHttpUrl
67
+ from card_includes import materialize_expanded_card
68
+ from prefab_hub_ui import build_runtime_wire, error_wire, parse_runtime_payload
 
 
 
 
69
 
70
 
71
  class _RootResourceRemoteAuthProvider(RemoteAuthProvider):
 
76
  return self.base_url
77
 
78
 
79
+
80
  def _get_oauth_config() -> tuple[str | None, list[str], str]:
81
  oauth_provider = os.environ.get("FAST_AGENT_SERVE_OAUTH", "").lower()
82
  if oauth_provider in ("hf", "huggingface"):
 
93
  return oauth_provider, oauth_scopes, resource_url
94
 
95
 
96
+ EXPANDED_RAW_CARD_FILE = materialize_expanded_card(
97
+ RAW_CARD_FILE,
98
+ workspace_root=WORKSPACE_ROOT,
99
+ out_dir=EXPANDED_CARDS_DIR,
100
+ )
101
+
102
  fast = FastAgent(
103
  "hub-search-prefab",
104
  config_path=str(CONFIG_PATH),
105
  parse_cli_args=False,
106
  )
107
+ fast.load_agents(EXPANDED_RAW_CARD_FILE)
 
 
 
 
108
 
109
  _oauth_provider, _oauth_scopes, _oauth_resource_url = _get_oauth_config()
110
  _auth_provider = None
 
139
  return await _run_agent(RAW_AGENT, query)
140
 
141
 
 
 
 
 
 
 
 
142
 
143
  def _get_request_bearer_token() -> str | None:
144
  access_token = get_access_token()
 
156
  request_bearer_token.reset(saved_token)
157
 
158
 
159
+
160
  def _wire_tool_result(wire: dict[str, object]) -> ToolResult:
161
  return ToolResult(
162
  content=[TextContent(type="text", text="[Rendered Prefab UI]")],
 
164
  )
165
 
166
 
167
+
168
  def _render_query_wire(query: str, raw_text: str) -> dict[str, object]:
169
  payload = parse_runtime_payload(raw_text)
170
  return build_runtime_wire(query, payload)
171
 
172
 
 
 
 
 
173
  async def _build_query_wire(query: str) -> dict[str, object]:
174
+ raw = await _run_raw(query)
175
+ return _render_query_wire(query, raw)
176
+
 
 
 
 
177
 
178
 
179
  def _missing_query_json() -> str:
 
190
 
191
  @mcp.tool(app=True)
192
  async def hub_search_prefab(query: str) -> ToolResult:
193
+ """Run the Prefab UI service with deterministic rendering over raw Hub output."""
194
  try:
195
  wire = await _build_query_wire(query)
196
  except Exception as exc: # noqa: BLE001
 
199
  return _wire_tool_result(wire)
200
 
201
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  @mcp.tool
203
  async def hub_search_prefab_wire(query: str | None = None) -> str:
204
+ """Return final deterministic Prefab wire JSON for a Hub query."""
205
  if not query:
206
  return json.dumps(error_wire("Missing required argument: query"), ensure_ascii=False)
207
  try:
 
224
  return json.dumps({"result": None, "meta": {"ok": False, "error": str(exc)}})
225
 
226
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  def main() -> None:
229
  mcp.run(
scripts/prefab_hub_ui.py CHANGED
@@ -5,10 +5,11 @@ import json
5
  from copy import deepcopy
6
  from typing import Any
7
 
8
- from prefab_ui.themes import blue
9
 
10
  PAGE_CSS_CLASS = "w-full max-w-6xl mx-auto p-4 md:p-6 lg:px-8"
11
- DEFAULT_THEME: dict[str, Any] = blue.to_json()
 
12
 
13
  _COMPONENT_KEY_ALIASES: dict[str, str] = {
14
  "bar_radius": "barRadius",
@@ -100,6 +101,19 @@ _PREFERRED_METRIC_KEYS: tuple[str, ...] = (
100
  "normal_likers",
101
  )
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  _URL_KEYS: tuple[str, ...] = (
104
  "repo_url",
105
  "url",
@@ -109,6 +123,62 @@ _URL_KEYS: tuple[str, ...] = (
109
  "github_repo_url",
110
  )
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def _copy_default_theme() -> dict[str, Any]:
114
  return deepcopy(DEFAULT_THEME)
@@ -457,14 +527,45 @@ def _is_scalar(value: Any) -> bool:
457
  return False
458
 
459
 
460
- def _normalize_cell(value: Any) -> Any:
461
  if value is None or isinstance(value, (str, int, float, bool)):
462
  return value
 
 
 
463
  return _compact_text(value)
464
 
465
 
466
  def _normalize_row(row: dict[str, Any]) -> dict[str, Any]:
467
- return {str(key): _normalize_cell(value) for key, value in row.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
 
469
 
470
  def _column_rank(key: str) -> tuple[int, str]:
@@ -481,6 +582,13 @@ def _metric_rank(key: str) -> tuple[int, str]:
481
  return (len(_PREFERRED_METRIC_KEYS), key)
482
 
483
 
 
 
 
 
 
 
 
484
  def _build_row_click(rows: list[dict[str, Any]]) -> dict[str, Any] | None:
485
  for key in _URL_KEYS:
486
  if any(isinstance(row.get(key), str) and row.get(key) for row in rows):
@@ -491,6 +599,198 @@ def _build_row_click(rows: list[dict[str, Any]]) -> dict[str, Any] | None:
491
  return None
492
 
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  def _build_table_card(
495
  title: str,
496
  rows: list[dict[str, Any]],
@@ -531,7 +831,13 @@ def _build_table_card(
531
 
532
  normalized_rows = [_normalize_row(row) for row in rows]
533
  all_keys = {key for row in normalized_rows for key in row}
534
- visible_keys = sorted(all_keys, key=_column_rank)[:8]
 
 
 
 
 
 
535
  columns: list[dict[str, Any]] = []
536
  for key in visible_keys:
537
  column: dict[str, Any] = {
@@ -539,8 +845,11 @@ def _build_table_card(
539
  "header": _titleize(key),
540
  "sortable": key not in {"description"},
541
  }
 
 
542
  if any(isinstance(row.get(key), (int, float)) for row in normalized_rows):
543
- column["align"] = "right"
 
544
  column["format"] = "number"
545
  if key in {"description"}:
546
  column["maxWidth"] = "28rem"
@@ -556,7 +865,6 @@ def _build_table_card(
556
  "pageSize": 10,
557
  }
558
 
559
- row_click = _build_row_click(rows)
560
  if row_click is not None:
561
  data_table["onRowClick"] = row_click
562
 
@@ -588,7 +896,10 @@ def _build_key_value_card(
588
  *,
589
  description: str | None = None,
590
  ) -> dict[str, Any]:
591
- rows = [{"field": _titleize(key), "value": _normalize_cell(value)} for key, value in values.items()]
 
 
 
592
  return _build_table_card(
593
  title,
594
  rows,
@@ -742,12 +1053,31 @@ def _render_list(
742
 
743
  if all(isinstance(item, dict) for item in value):
744
  rows = [item for item in value if isinstance(item, dict)]
745
- return [_build_table_card(title, rows, description=description)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
 
747
  rows = [
748
  {
749
  "index": index + 1,
750
- "value": _normalize_cell(item),
751
  }
752
  for index, item in enumerate(value)
753
  ]
@@ -764,6 +1094,35 @@ def _render_dict(
764
  if depth > 2:
765
  return [_build_key_value_card(title, value, description=description)]
766
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
  if "results" in value or "coverage" in value:
768
  sections: list[dict[str, Any]] = []
769
  results = value.get("results")
@@ -909,6 +1268,19 @@ def _build_summary_card(
909
  }
910
  )
911
 
 
 
 
 
 
 
 
 
 
 
 
 
 
912
  return {"type": "Card", "children": summary_children}
913
 
914
 
@@ -924,7 +1296,7 @@ def build_runtime_wire(query: str, payload: dict[str, Any]) -> dict[str, Any]:
924
  helper_meta: dict[str, Any] | None = None
925
  body_children: list[dict[str, Any]] = []
926
 
927
- if _looks_like_helper_envelope(result):
928
  helper_meta = result.get("meta") if isinstance(result.get("meta"), dict) else None
929
  if result.get("ok") is False:
930
  message = str(result.get("error") or "Helper query failed")
@@ -953,10 +1325,11 @@ def build_runtime_wire(query: str, payload: dict[str, Any]) -> dict[str, Any]:
953
  else:
954
  body_children.extend(_render_value("Results", result))
955
 
 
956
  body_view = {
957
  "type": "Column",
958
  "gap": 6,
959
- "cssClass": PAGE_CSS_CLASS,
960
  "children": [
961
  _build_summary_card(
962
  query,
 
5
  from copy import deepcopy
6
  from typing import Any
7
 
8
+ from prefab_ui.themes import Basic
9
 
10
  PAGE_CSS_CLASS = "w-full max-w-6xl mx-auto p-4 md:p-6 lg:px-8"
11
+ WIDE_PAGE_CSS_CLASS = "w-full max-w-[90rem] mx-auto p-4 md:p-6 lg:px-8"
12
+ DEFAULT_THEME: dict[str, Any] = Basic(accent="blue").to_json()
13
 
14
  _COMPONENT_KEY_ALIASES: dict[str, str] = {
15
  "bar_radius": "barRadius",
 
101
  "normal_likers",
102
  )
103
 
104
+ _PREFERRED_LABEL_KEYS: tuple[str, ...] = (
105
+ "label",
106
+ "name",
107
+ "title",
108
+ "repo_type",
109
+ "status",
110
+ "task",
111
+ "pipeline_tag",
112
+ "kind",
113
+ "owner",
114
+ "username",
115
+ )
116
+
117
  _URL_KEYS: tuple[str, ...] = (
118
  "repo_url",
119
  "url",
 
123
  "github_repo_url",
124
  )
125
 
126
+ _FILTERABLE_COLUMN_KEYS: tuple[str, ...] = (
127
+ "repo_type",
128
+ "pipeline_tag",
129
+ "pipeline_tags",
130
+ "tags",
131
+ "status",
132
+ "license",
133
+ "author",
134
+ "owner",
135
+ "username",
136
+ "user",
137
+ "users",
138
+ "handle",
139
+ "organization",
140
+ "organizations",
141
+ )
142
+
143
+ _FILTERABLE_COLUMN_SUFFIXES: tuple[str, ...] = (
144
+ "_type",
145
+ "_tag",
146
+ "_tags",
147
+ "_status",
148
+ "_license",
149
+ "_author",
150
+ "_owner",
151
+ "_username",
152
+ "_user",
153
+ "_users",
154
+ "_handle",
155
+ "_organization",
156
+ "_organizations",
157
+ )
158
+
159
+ _USER_NAME_KEYS: tuple[str, ...] = (
160
+ "full_name",
161
+ "display_name",
162
+ "name",
163
+ "username",
164
+ "handle",
165
+ )
166
+
167
+ _USER_AVATAR_KEYS: tuple[str, ...] = (
168
+ "avatar_url",
169
+ "avatar",
170
+ "image_url",
171
+ )
172
+
173
+ _USER_SOCIAL_LINK_KEYS: tuple[tuple[str, str], ...] = (
174
+ ("hf_url", "Hugging Face"),
175
+ ("profile_url", "Profile"),
176
+ ("website_url", "Website"),
177
+ ("blog_url", "Blog"),
178
+ ("github_url", "GitHub"),
179
+ ("twitter_url", "Twitter"),
180
+ )
181
+
182
 
183
  def _copy_default_theme() -> dict[str, Any]:
184
  return deepcopy(DEFAULT_THEME)
 
527
  return False
528
 
529
 
530
+ def _normalize_cell(value: Any, *, key: str) -> Any:
531
  if value is None or isinstance(value, (str, int, float, bool)):
532
  return value
533
+ if isinstance(value, list):
534
+ if value and all(isinstance(item, str) for item in value):
535
+ return [_compact_text(item, limit=40) for item in value[:8]]
536
  return _compact_text(value)
537
 
538
 
539
  def _normalize_row(row: dict[str, Any]) -> dict[str, Any]:
540
+ return {
541
+ str(key): _normalize_cell(value, key=str(key)) for key, value in row.items()
542
+ }
543
+
544
+
545
+ def _is_badge_friendly_key(key: str) -> bool:
546
+ return key in _FILTERABLE_COLUMN_KEYS or key.endswith(_FILTERABLE_COLUMN_SUFFIXES)
547
+
548
+
549
+ def _should_make_filterable(key: str, rows: list[dict[str, Any]]) -> bool:
550
+ if not _is_badge_friendly_key(key):
551
+ return False
552
+
553
+ values = [row.get(key) for row in rows]
554
+ if any(isinstance(value, list) for value in values):
555
+ return True
556
+
557
+ scalar_values = [
558
+ value
559
+ for value in values
560
+ if isinstance(value, (str, int, float, bool))
561
+ ]
562
+ if not scalar_values:
563
+ return False
564
+
565
+ if any(isinstance(value, (int, float)) and not isinstance(value, bool) for value in scalar_values):
566
+ return False
567
+
568
+ return 0 < len({str(value) for value in scalar_values}) <= 12
569
 
570
 
571
  def _column_rank(key: str) -> tuple[int, str]:
 
582
  return (len(_PREFERRED_METRIC_KEYS), key)
583
 
584
 
585
+ def _label_rank(key: str) -> tuple[int, str]:
586
+ try:
587
+ return (_PREFERRED_LABEL_KEYS.index(key), key)
588
+ except ValueError:
589
+ return (len(_PREFERRED_LABEL_KEYS), key)
590
+
591
+
592
  def _build_row_click(rows: list[dict[str, Any]]) -> dict[str, Any] | None:
593
  for key in _URL_KEYS:
594
  if any(isinstance(row.get(key), str) and row.get(key) for row in rows):
 
599
  return None
600
 
601
 
602
+ def _select_distribution_fields(
603
+ rows: list[dict[str, Any]],
604
+ ) -> tuple[str, str] | None:
605
+ if not 2 <= len(rows) <= 8:
606
+ return None
607
+
608
+ shared_keys = set(rows[0])
609
+ for row in rows[1:]:
610
+ shared_keys &= set(row)
611
+ if not shared_keys:
612
+ return None
613
+
614
+ numeric_keys = [
615
+ key
616
+ for key in shared_keys
617
+ if all(isinstance(row.get(key), (int, float)) for row in rows)
618
+ ]
619
+ if not numeric_keys:
620
+ return None
621
+
622
+ count_key = sorted(numeric_keys, key=_metric_rank)[0]
623
+ label_candidates = [
624
+ key
625
+ for key in shared_keys
626
+ if key != count_key
627
+ and all(isinstance(row.get(key), str) and row.get(key).strip() for row in rows)
628
+ ]
629
+ if not label_candidates:
630
+ return None
631
+
632
+ label_key = sorted(label_candidates, key=_label_rank)[0]
633
+ return label_key, count_key
634
+
635
+
636
+ def _build_distribution_card(
637
+ title: str,
638
+ rows: list[dict[str, Any]],
639
+ *,
640
+ label_key: str,
641
+ count_key: str,
642
+ ) -> dict[str, Any]:
643
+ return {
644
+ "type": "Card",
645
+ "children": [
646
+ {
647
+ "type": "CardHeader",
648
+ "children": [
649
+ {"type": "CardTitle", "content": f"{title} distribution"},
650
+ {
651
+ "type": "CardDescription",
652
+ "content": f'{_titleize(count_key)} by {_titleize(label_key).lower()}',
653
+ },
654
+ ],
655
+ },
656
+ {
657
+ "type": "CardContent",
658
+ "children": [
659
+ {
660
+ "type": "PieChart",
661
+ "data": rows,
662
+ "dataKey": count_key,
663
+ "nameKey": label_key,
664
+ "innerRadius": 60,
665
+ "paddingAngle": 2,
666
+ "showLegend": True,
667
+ "showTooltip": True,
668
+ "showLabel": False,
669
+ "height": 260,
670
+ }
671
+ ],
672
+ },
673
+ ],
674
+ }
675
+
676
+
677
+ def _looks_like_user_profile(values: dict[str, Any]) -> bool:
678
+ return any(key in values for key in ("username", "handle", "avatar_url", "hf_url", "profile_url"))
679
+
680
+
681
+ def _first_present(values: dict[str, Any], keys: tuple[str, ...]) -> str | None:
682
+ for key in keys:
683
+ value = values.get(key)
684
+ if isinstance(value, str) and value.strip():
685
+ return value.strip()
686
+ return None
687
+
688
+
689
+ def _user_profile_links(values: dict[str, Any]) -> list[tuple[str, str]]:
690
+ links: list[tuple[str, str]] = []
691
+ for key, label in _USER_SOCIAL_LINK_KEYS:
692
+ value = values.get(key)
693
+ if isinstance(value, str) and value.strip():
694
+ links.append((label, value.strip()))
695
+
696
+ username = _first_present(values, ("username", "handle"))
697
+ if username and not any(label == "Hugging Face" for label, _ in links):
698
+ links.insert(0, ("Hugging Face", f"https://huggingface.co/{username.lstrip('@')}"))
699
+
700
+ github = values.get("github")
701
+ if isinstance(github, str) and github.strip() and not any(label == "GitHub" for label, _ in links):
702
+ links.append(("GitHub", f"https://github.com/{github.strip().lstrip('@')}"))
703
+
704
+ twitter = values.get("twitter")
705
+ if isinstance(twitter, str) and twitter.strip() and not any(label == "Twitter" for label, _ in links):
706
+ links.append(("Twitter", f"https://x.com/{twitter.strip().lstrip('@')}"))
707
+
708
+ deduped: list[tuple[str, str]] = []
709
+ seen_urls: set[str] = set()
710
+ for label, url in links:
711
+ if url in seen_urls:
712
+ continue
713
+ seen_urls.add(url)
714
+ deduped.append((label, url))
715
+ return deduped[:4]
716
+
717
+
718
+ def _build_user_profile_card(title: str, values: dict[str, Any]) -> dict[str, Any] | None:
719
+ name = _first_present(values, _USER_NAME_KEYS)
720
+ if not name:
721
+ return None
722
+
723
+ username = _first_present(values, ("username", "handle"))
724
+ subtitle = f"@{username.lstrip('@')}" if username else title
725
+ avatar = _first_present(values, _USER_AVATAR_KEYS)
726
+ bio = _first_present(values, ("bio", "description", "headline"))
727
+ links = _user_profile_links(values)
728
+
729
+ row_children: list[dict[str, Any]] = []
730
+ if avatar:
731
+ row_children.append(
732
+ {
733
+ "type": "Image",
734
+ "src": avatar,
735
+ "alt": name,
736
+ "width": "64px",
737
+ "height": "64px",
738
+ "cssClass": "rounded-full border object-cover",
739
+ }
740
+ )
741
+
742
+ body_children: list[dict[str, Any]] = [
743
+ {"type": "H3", "content": name},
744
+ {"type": "Muted", "content": subtitle},
745
+ ]
746
+ if bio:
747
+ body_children.append({"type": "Text", "content": bio})
748
+ if links:
749
+ body_children.append(
750
+ {
751
+ "type": "Row",
752
+ "gap": 2,
753
+ "cssClass": "flex-wrap",
754
+ "children": [
755
+ {
756
+ "type": "Button",
757
+ "label": "View profile" if index == 0 else label,
758
+ "variant": "default" if index == 0 else "outline",
759
+ "buttonType": "button",
760
+ "onClick": {"action": "openLink", "url": url},
761
+ }
762
+ for index, (label, url) in enumerate(links)
763
+ ],
764
+ }
765
+ )
766
+
767
+ row_children.append({"type": "Column", "gap": 2, "children": body_children})
768
+
769
+ return {
770
+ "type": "Card",
771
+ "children": [
772
+ {
773
+ "type": "CardContent",
774
+ "cssClass": "p-6",
775
+ "children": [{"type": "Row", "gap": 4, "align": "center", "children": row_children}],
776
+ }
777
+ ],
778
+ }
779
+
780
+
781
+ def _prefers_wide_layout(value: Any) -> bool:
782
+ if isinstance(value, list):
783
+ return bool(value) and all(isinstance(item, dict) for item in value)
784
+ if isinstance(value, dict):
785
+ items = value.get("items")
786
+ if isinstance(items, list) and items and all(isinstance(item, dict) for item in items):
787
+ return True
788
+ results = value.get("results")
789
+ if isinstance(results, list) and results and all(isinstance(item, dict) for item in results):
790
+ return True
791
+ return False
792
+
793
+
794
  def _build_table_card(
795
  title: str,
796
  rows: list[dict[str, Any]],
 
831
 
832
  normalized_rows = [_normalize_row(row) for row in rows]
833
  all_keys = {key for row in normalized_rows for key in row}
834
+ row_click = _build_row_click(rows)
835
+ visible_keys = sorted(all_keys, key=_column_rank)
836
+ if row_click is not None:
837
+ non_url_keys = [key for key in visible_keys if key not in _URL_KEYS]
838
+ if non_url_keys:
839
+ visible_keys = non_url_keys
840
+ visible_keys = visible_keys[:8]
841
  columns: list[dict[str, Any]] = []
842
  for key in visible_keys:
843
  column: dict[str, Any] = {
 
845
  "header": _titleize(key),
846
  "sortable": key not in {"description"},
847
  }
848
+ if _should_make_filterable(key, normalized_rows):
849
+ column["filterable"] = True
850
  if any(isinstance(row.get(key), (int, float)) for row in normalized_rows):
851
+ column["headerClass"] = "text-right"
852
+ column["cellClass"] = "text-right"
853
  column["format"] = "number"
854
  if key in {"description"}:
855
  column["maxWidth"] = "28rem"
 
865
  "pageSize": 10,
866
  }
867
 
 
868
  if row_click is not None:
869
  data_table["onRowClick"] = row_click
870
 
 
896
  *,
897
  description: str | None = None,
898
  ) -> dict[str, Any]:
899
+ rows = [
900
+ {"field": _titleize(key), "value": _normalize_cell(value, key=str(key))}
901
+ for key, value in values.items()
902
+ ]
903
  return _build_table_card(
904
  title,
905
  rows,
 
1053
 
1054
  if all(isinstance(item, dict) for item in value):
1055
  rows = [item for item in value if isinstance(item, dict)]
1056
+ table_card = _build_table_card(title, rows, description=description)
1057
+ distribution_fields = _select_distribution_fields(rows)
1058
+ if distribution_fields is None:
1059
+ return [table_card]
1060
+ label_key, count_key = distribution_fields
1061
+ return [
1062
+ {
1063
+ "type": "Column",
1064
+ "gap": 4,
1065
+ "children": [
1066
+ _build_distribution_card(
1067
+ title,
1068
+ rows,
1069
+ label_key=label_key,
1070
+ count_key=count_key,
1071
+ ),
1072
+ table_card,
1073
+ ],
1074
+ }
1075
+ ]
1076
 
1077
  rows = [
1078
  {
1079
  "index": index + 1,
1080
+ "value": _normalize_cell(item, key="value"),
1081
  }
1082
  for index, item in enumerate(value)
1083
  ]
 
1094
  if depth > 2:
1095
  return [_build_key_value_card(title, value, description=description)]
1096
 
1097
+ if depth <= 1 and _looks_like_user_profile(value):
1098
+ sections: list[dict[str, Any]] = []
1099
+ user_card = _build_user_profile_card(title, value)
1100
+ if user_card is not None:
1101
+ sections.append(user_card)
1102
+ remaining = {
1103
+ key: item
1104
+ for key, item in value.items()
1105
+ if key
1106
+ not in {
1107
+ *_USER_NAME_KEYS,
1108
+ *_USER_AVATAR_KEYS,
1109
+ "bio",
1110
+ "description",
1111
+ "headline",
1112
+ "hf_url",
1113
+ "profile_url",
1114
+ "website_url",
1115
+ "blog_url",
1116
+ "github_url",
1117
+ "twitter_url",
1118
+ "github",
1119
+ "twitter",
1120
+ }
1121
+ }
1122
+ if remaining:
1123
+ sections.extend(_render_dict(title, remaining, description=description, depth=depth + 1))
1124
+ return sections
1125
+
1126
  if "results" in value or "coverage" in value:
1127
  sections: list[dict[str, Any]] = []
1128
  results = value.get("results")
 
1268
  }
1269
  )
1270
 
1271
+ if isinstance(runtime_meta, dict) and runtime_meta.get("elapsed_ms") is not None:
1272
+ summary_children.append(
1273
+ {
1274
+ "type": "CardFooter",
1275
+ "children": [
1276
+ {
1277
+ "type": "Muted",
1278
+ "content": f'Runtime: {runtime_meta["elapsed_ms"]} ms',
1279
+ }
1280
+ ],
1281
+ }
1282
+ )
1283
+
1284
  return {"type": "Card", "children": summary_children}
1285
 
1286
 
 
1296
  helper_meta: dict[str, Any] | None = None
1297
  body_children: list[dict[str, Any]] = []
1298
 
1299
+ if isinstance(result, dict) and _looks_like_helper_envelope(result):
1300
  helper_meta = result.get("meta") if isinstance(result.get("meta"), dict) else None
1301
  if result.get("ok") is False:
1302
  message = str(result.get("error") or "Helper query failed")
 
1325
  else:
1326
  body_children.extend(_render_value("Results", result))
1327
 
1328
+ page_css_class = WIDE_PAGE_CSS_CLASS if _prefers_wide_layout(result) else PAGE_CSS_CLASS
1329
  body_view = {
1330
  "type": "Column",
1331
  "gap": 6,
1332
+ "cssClass": page_css_class,
1333
  "children": [
1334
  _build_summary_card(
1335
  query,
wheels/.gitkeep ADDED
File without changes
wheels/prefab_ui-0.13.2.dev5+a585463-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a94bcc2a2fd2bd31f2430ee7fd8f04f2ac410afb2932f03014a8609bce5fb3
3
+ size 896909