akseljoonas HF Staff commited on
Commit
0b8bdf3
·
1 Parent(s): 63a4db3

find examples tool (repo crawling + fuzzy matching)

Browse files
Files changed (3) hide show
  1. agent/tools/github_find_examples.py +323 -240
  2. pyproject.toml +1 -0
  3. uv.lock +77 -0
agent/tools/github_find_examples.py CHANGED
@@ -1,201 +1,259 @@
1
  """
2
  GitHub Find Examples Tool - Discover examples, tutorials, and guides for any library
3
 
4
- Uses intelligent heuristics to find the best learning resources on GitHub.
5
  """
6
 
7
- import math
8
  import os
9
- from datetime import datetime, timedelta
10
- from typing import Any, Dict, List, Optional
11
 
12
  import requests
 
13
 
14
  from agent.tools.types import ToolResult
15
 
16
-
17
- def _search_github_code(
18
- query: str, token: str, limit: int = 20
19
- ) -> List[Dict[str, Any]]:
20
- """Execute a GitHub code search query"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  headers = {
22
- "Accept": "application/vnd.github.text-match+json",
23
  "X-GitHub-Api-Version": "2022-11-28",
24
  "Authorization": f"Bearer {token}",
25
  }
26
 
27
- results = []
28
- page = 1
29
- per_page = min(100, limit)
30
 
 
31
  try:
32
- while len(results) < limit:
33
- params = {"q": query, "per_page": per_page, "page": page}
34
- response = requests.get(
35
- "https://api.github.com/search/code",
36
- headers=headers,
37
- params=params,
38
- timeout=30,
39
- )
40
-
41
- if response.status_code != 200:
42
- break
43
-
44
- data = response.json()
45
- items = data.get("items", [])
46
- if not items:
47
- break
48
-
49
- for item in items:
50
- results.append(
51
- {
52
- "repo": item.get("repository", {}).get("full_name", ""),
53
- "path": item.get("path", ""),
54
- "sha": item.get("sha", ""),
55
- "url": item.get("html_url", ""),
56
- "size": item.get("size", 0),
57
- "text_matches": item.get("text_matches", []),
58
- }
59
- )
60
 
61
- if len(results) >= limit or len(items) < per_page:
62
- break
63
- page += 1
 
64
 
65
- except Exception:
66
- pass
 
 
 
 
 
 
 
 
67
 
68
- return results[:limit]
 
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, Any]]:
72
- """Fetch star count and update date for repositories"""
 
73
  headers = {
74
  "Accept": "application/vnd.github+json",
75
  "X-GitHub-Api-Version": "2022-11-28",
76
  "Authorization": f"Bearer {token}",
77
  }
78
 
79
- metadata = {}
80
- for repo in repos:
81
- try:
82
- response = requests.get(
83
- f"https://api.github.com/repos/{repo}", headers=headers, timeout=10
84
- )
85
- if response.status_code == 200:
86
- data = response.json()
87
- metadata[repo] = {
88
- "stars": data.get("stargazers_count", 0),
89
- "updated_at": data.get("updated_at", ""),
90
- }
91
- except Exception:
92
- continue
93
-
94
- return metadata
95
-
96
-
97
- def _score_example(
98
- result: Dict[str, Any], metadata: Dict[str, Dict[str, Any]]
99
- ) -> tuple[float, str]:
100
- """Score an example based on multiple heuristics"""
101
- path = result["path"].lower()
102
- repo = result["repo"]
103
- score = 0.0
104
- reasons = []
105
-
106
- # Path-based scoring
107
- if "readme.md" in path:
108
- score += 100
109
- reasons.append("README file")
110
- elif "examples/" in path or "example/" in path:
111
- score += 90
112
- reasons.append("in examples/")
113
- elif "tutorials/" in path or "tutorial/" in path:
114
- score += 85
115
- reasons.append("in tutorials/")
116
- elif "docs/" in path or "doc/" in path:
117
- score += 80
118
- reasons.append("in docs/")
119
- elif "notebooks/" in path or "notebook/" in path:
120
- score += 70
121
- reasons.append("in notebooks/")
122
-
123
- # Extension scoring
124
- if path.endswith(".ipynb"):
125
- score += 15
126
- elif path.endswith(".md"):
127
- score += 20
128
- elif path.endswith(".py"):
129
- score += 10
130
-
131
- # Content keywords from text matches
132
- text_content = ""
133
- for match in result.get("text_matches", []):
134
- text_content += match.get("fragment", "").lower() + " "
135
-
136
- if 'if __name__ == "__main__"' in text_content:
137
- score += 50
138
- reasons.append("runnable example")
139
- if "quickstart" in text_content or "getting started" in text_content:
140
- score += 60
141
- reasons.append("quickstart guide")
142
- if "tutorial" in text_content:
143
- score += 50
144
- reasons.append("tutorial content")
145
-
146
- # Repository metadata scoring
147
- repo_meta = metadata.get(repo, {})
148
- stars = repo_meta.get("stars", 0)
149
- updated_at = repo_meta.get("updated_at", "")
150
-
151
- # Star-based score (logarithmic)
152
- if stars > 0:
153
- score += math.log10(stars + 1) * 10
154
-
155
- # Recency bonus (updated in last 6 months)
156
- if updated_at:
157
- try:
158
- updated_date = datetime.fromisoformat(updated_at.replace("Z", "+00:00"))
159
- if datetime.now(updated_date.tzinfo) - updated_date < timedelta(days=180):
160
- score += 20
161
- reasons.append("recently updated")
162
- except Exception:
163
- pass
164
-
165
- # Filename quality
166
- filename = path.split("/")[-1].lower()
167
- if any(
168
- word in filename
169
- for word in ["example", "tutorial", "guide", "quickstart", "demo"]
170
- ):
171
- score += 30
172
- reasons.append("descriptive filename")
173
-
174
- # Size penalty for very large files
175
- if result["size"] > 100000:
176
- score *= 0.5
177
- reasons.append("large file")
178
-
179
- return score, ", ".join(reasons) if reasons else "matches library"
 
 
 
 
 
180
 
181
 
182
  def find_examples(
183
- library: str,
 
184
  org: str = "huggingface",
185
- repo_scope: Optional[str] = None,
186
  max_results: int = 10,
 
187
  ) -> ToolResult:
188
  """
189
- Find examples, tutorials, and guides for a library using intelligent search.
190
 
191
  Args:
192
- library: Library name (e.g., "transformers", "torch", "react")
193
- org: GitHub organization to search in
194
- repo_scope: Optional specific repository name
195
- max_results: Maximum number of results (default 10)
 
196
 
197
  Returns:
198
- ToolResult with ranked examples
199
  """
200
  token = os.environ.get("GITHUB_TOKEN")
201
  if not token:
@@ -206,119 +264,143 @@ def find_examples(
206
  "isError": True,
207
  }
208
 
209
- # Build search queries
210
- all_results = []
211
-
212
- # Query 1: Search in example directories
213
- for path_pattern in ["examples/", "docs/", "tutorials/", "notebooks/"]:
214
- query_parts = [f"org:{org}", library, f"path:{path_pattern}"]
215
- if repo_scope:
216
- query_parts[0] = f"repo:{org}/{repo_scope}"
217
- query = " ".join(query_parts)
218
- all_results.extend(_search_github_code(query, token, limit=20))
219
-
220
- # Query 2: Search README files
221
- query_parts = [f"org:{org}", library, "filename:README"]
222
- if repo_scope:
223
- query_parts[0] = f"repo:{org}/{repo_scope}"
224
- query = " ".join(query_parts)
225
- all_results.extend(_search_github_code(query, token, limit=20))
226
-
227
- # Deduplicate
228
- seen = set()
229
- unique_results = []
230
- for result in all_results:
231
- key = (result["repo"], result["path"])
232
- if key not in seen:
233
- seen.add(key)
234
- unique_results.append(result)
235
-
236
- if not unique_results:
237
  return {
238
- "formatted": f"No examples found for '{library}' in {org}",
239
  "totalResults": 0,
240
  "resultsShared": 0,
 
241
  }
242
 
243
- # Fetch repo metadata
244
- repos = list(set(r["repo"] for r in unique_results))
245
- metadata = _fetch_repo_metadata(repos, token)
246
 
247
- # Score and rank
248
- scored = []
249
- for result in unique_results:
250
- score, reason = _score_example(result, metadata)
251
- repo_meta = metadata.get(result["repo"], {})
252
- scored.append(
253
- {
254
- "repo": result["repo"],
255
- "path": result["path"],
256
- "url": result["url"],
257
- "score": score,
258
- "reason": reason,
259
- "stars": repo_meta.get("stars", 0),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  }
261
- )
262
 
263
- scored.sort(key=lambda x: x["score"], reverse=True)
264
- top_results = scored[:max_results]
265
 
266
- # Format output
267
- lines = [f"**Found {len(top_results)} examples for '{library}' in {org}:**\n"]
268
 
269
- for i, ex in enumerate(top_results, 1):
270
- lines.append(f"{i}. **{ex['repo']}/{ex['path']}**")
271
- lines.append(f" Score: {ex['score']:.1f} | {ex['stars']:,} stars")
272
- lines.append(f" Reason: {ex['reason']}")
273
- lines.append(f" URL: {ex['url']}\n")
 
 
 
 
 
 
 
274
 
275
  return {
276
  "formatted": "\n".join(lines),
277
- "totalResults": len(top_results),
278
- "resultsShared": len(top_results),
279
  }
280
 
281
 
282
  # Tool specification
283
  GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
284
- "name": "find_examples",
285
  "description": (
286
- "Find examples, tutorials, and guides for any library on GitHub using intelligent heuristic-based search.\n\n"
287
- "Uses multiple search strategies and ranks results by:\n"
288
- "- Path quality (examples/, docs/, tutorials/ directories)\n"
289
- "- Content keywords (quickstart, tutorial, runnable code)\n"
290
- "- Repository popularity (stars, recent updates)\n"
291
- "- File characteristics (size, extension, descriptive names)\n\n"
 
 
 
 
292
  "## Examples:\n\n"
293
- "**Find transformers examples in Hugging Face:**\n"
294
- "{'library': 'transformers', 'org': 'huggingface', 'max_results': 5}\n\n"
295
- "**Find PyTorch examples in specific repo:**\n"
296
- "{'library': 'torch', 'org': 'pytorch', 'repo_scope': 'examples', 'max_results': 10}\n\n"
297
- "**Find React quickstart guides:**\n"
298
- "{'library': 'react quickstart', 'org': 'facebook', 'max_results': 3}\n\n"
299
- "Returns ranked list with file paths, scores, star counts, and direct URLs."
 
 
 
 
300
  ),
301
  "parameters": {
302
  "type": "object",
303
  "properties": {
304
- "library": {
305
  "type": "string",
306
- "description": "Library name to search for (e.g., 'transformers', 'torch', 'react'). Required.",
307
  },
308
- "org": {
309
  "type": "string",
310
- "description": "GitHub organization to search in. Default: 'huggingface'.",
311
  },
312
- "repo_scope": {
313
  "type": "string",
314
- "description": "Optional specific repository name within the org (e.g., 'transformers').",
315
  },
316
  "max_results": {
317
  "type": "integer",
318
- "description": "Maximum number of results to return. Default: 10.",
 
 
 
 
319
  },
320
  },
321
- "required": ["library"],
322
  },
323
  }
324
 
@@ -327,10 +409,11 @@ async def github_find_examples_handler(arguments: Dict[str, Any]) -> tuple[str,
327
  """Handler for agent tool router"""
328
  try:
329
  result = find_examples(
330
- library=arguments["library"],
 
331
  org=arguments.get("org", "huggingface"),
332
- repo_scope=arguments.get("repo_scope"),
333
- max_results=arguments.get("max_results", 10),
334
  )
335
  return result["formatted"], not result.get("isError", False)
336
  except Exception as e:
 
1
  """
2
  GitHub Find Examples Tool - Discover examples, tutorials, and guides for any library
3
 
4
+ Lists all files in a repository and performs deterministic keyword search.
5
  """
6
 
 
7
  import os
8
+ from typing import Any, Dict, List
 
9
 
10
  import requests
11
+ from thefuzz import fuzz
12
 
13
  from agent.tools.types import ToolResult
14
 
15
+ # Global list of example-related keywords for fuzzy matching
16
+ EXAMPLE_PATTERNS = [
17
+ # Core example patterns
18
+ "examples",
19
+ "example",
20
+ "samples",
21
+ "sample",
22
+ "demos",
23
+ "demo",
24
+ # Tutorial/learning patterns
25
+ "tutorials",
26
+ "tutorial",
27
+ "guides",
28
+ "guide",
29
+ "quickstart",
30
+ "getting-started",
31
+ "getting_started",
32
+ "howto",
33
+ "how-to",
34
+ "walkthroughs",
35
+ "walkthrough",
36
+ # Cookbook/recipe patterns
37
+ "cookbook",
38
+ "cookbooks",
39
+ "recipes",
40
+ "recipe",
41
+ # Notebook patterns (common in ML/data science)
42
+ "notebooks",
43
+ "notebook",
44
+ "ipynb",
45
+ # Starter/template patterns
46
+ "starter",
47
+ "starters",
48
+ "templates",
49
+ "template",
50
+ "boilerplate",
51
+ # Snippet/use-case patterns
52
+ "snippets",
53
+ "snippet",
54
+ "use-cases",
55
+ "usecases",
56
+ "use_cases",
57
+ # Showcase/playground patterns
58
+ "showcase",
59
+ "playground",
60
+ "sandbox",
61
+ # Script patterns
62
+ "scripts",
63
+ ]
64
+
65
+
66
+ def _get_repo_tree(org: str, repo: str, token: str) -> tuple[List[Dict[str, Any]], str]:
67
+ """Get all files in a repository recursively. Returns (files, error_message)"""
68
  headers = {
69
+ "Accept": "application/vnd.github+json",
70
  "X-GitHub-Api-Version": "2022-11-28",
71
  "Authorization": f"Bearer {token}",
72
  }
73
 
74
+ full_repo = f"{org}/{repo}"
 
 
75
 
76
+ # Get default branch
77
  try:
78
+ response = requests.get(
79
+ f"https://api.github.com/repos/{full_repo}", headers=headers, timeout=10
80
+ )
81
+ if response.status_code == 404:
82
+ return [], "not_found"
83
+ if response.status_code != 200:
84
+ return [], f"API error: {response.status_code}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ repo_data = response.json()
87
+ default_branch = repo_data.get("default_branch", "main")
88
+ except Exception as e:
89
+ return [], f"Error fetching repo: {str(e)}"
90
 
91
+ # Get repository tree recursively
92
+ try:
93
+ response = requests.get(
94
+ f"https://api.github.com/repos/{full_repo}/git/trees/{default_branch}",
95
+ headers=headers,
96
+ params={"recursive": "1"},
97
+ timeout=30,
98
+ )
99
+ if response.status_code != 200:
100
+ return [], f"Error fetching tree: {response.status_code}"
101
 
102
+ data = response.json()
103
+ tree = data.get("tree", [])
104
 
105
+ # Filter to only include files (not directories)
106
+ files = [
107
+ {
108
+ "path": item["path"],
109
+ "sha": item["sha"],
110
+ "size": item.get("size", 0),
111
+ "url": f"https://github.com/{full_repo}/blob/{default_branch}/{item['path']}",
112
+ }
113
+ for item in tree
114
+ if item["type"] == "blob"
115
+ ]
116
+
117
+ return files, ""
118
+ except Exception as e:
119
+ return [], f"Error processing tree: {str(e)}"
120
 
121
+
122
+ def _search_similar_repos(org: str, repo: str, token: str) -> List[Dict[str, Any]]:
123
+ """Search for similar repository names in the organization"""
124
  headers = {
125
  "Accept": "application/vnd.github+json",
126
  "X-GitHub-Api-Version": "2022-11-28",
127
  "Authorization": f"Bearer {token}",
128
  }
129
 
130
+ # Search for repos in the org with similar name
131
+ query = f"org:{org} {repo}"
132
+
133
+ try:
134
+ response = requests.get(
135
+ "https://api.github.com/search/repositories",
136
+ headers=headers,
137
+ params={"q": query, "sort": "stars", "order": "desc", "per_page": 10},
138
+ timeout=30,
139
+ )
140
+
141
+ if response.status_code != 200:
142
+ return []
143
+
144
+ data = response.json()
145
+ items = data.get("items", [])
146
+
147
+ return [
148
+ {
149
+ "name": item.get("name"),
150
+ "full_name": item.get("full_name"),
151
+ "description": item.get("description"),
152
+ "stars": item.get("stargazers_count", 0),
153
+ "url": item.get("html_url"),
154
+ }
155
+ for item in items
156
+ ]
157
+ except Exception:
158
+ return []
159
+
160
+
161
+ def _score_against_example_patterns(file_path: str) -> int:
162
+ """Score file against example patterns using token_set_ratio"""
163
+ scores = []
164
+ for pattern in EXAMPLE_PATTERNS:
165
+ score = fuzz.token_set_ratio(pattern.lower(), file_path.lower())
166
+ scores.append(score)
167
+ return max(scores) if scores else 0
168
+
169
+
170
+ def _score_against_keyword(file_path: str, keyword: str) -> int:
171
+ """Calculate fuzzy match score for a file path against a keyword"""
172
+ # Use partial_ratio for substring matching (good for paths)
173
+ # Also check token_set_ratio for word-level matching
174
+ partial_score = fuzz.partial_ratio(keyword.lower(), file_path.lower())
175
+ token_score = fuzz.token_set_ratio(keyword.lower(), file_path.lower())
176
+
177
+ # Return the higher of the two
178
+ return max(partial_score, token_score)
179
+
180
+
181
+ def _handle_repo_tree_errors(
182
+ all_files: List[Dict[str, Any]],
183
+ error: str,
184
+ org: str,
185
+ repo: str,
186
+ token: str,
187
+ ) -> ToolResult | None:
188
+ """Handle errors from repo tree fetch. Returns ToolResult if error, None if OK."""
189
+ if error == "not_found":
190
+ similar_repos = _search_similar_repos(org, repo, token)
191
+
192
+ if not similar_repos:
193
+ return {
194
+ "formatted": f"Repository '{org}/{repo}' not found and no similar repositories found.",
195
+ "totalResults": 0,
196
+ "resultsShared": 0,
197
+ "isError": True,
198
+ }
199
+
200
+ # Format similar repos
201
+ lines = [f"**Repository '{org}/{repo}' not found. Similar repositories:**\n"]
202
+ for i, r in enumerate(similar_repos, 1):
203
+ lines.append(f"{i}. **{r['full_name']}** (⭐ {r['stars']:,} stars)")
204
+ if r["description"]:
205
+ desc = (
206
+ r["description"][:100] + "..."
207
+ if len(r["description"]) > 100
208
+ else r["description"]
209
+ )
210
+ lines.append(f" {desc}")
211
+ lines.append(f" {r['url']}\n")
212
+
213
+ return {
214
+ "formatted": "\n".join(lines),
215
+ "totalResults": len(similar_repos),
216
+ "resultsShared": len(similar_repos),
217
+ "isError": True,
218
+ }
219
+
220
+ if error:
221
+ return {
222
+ "formatted": f"Error accessing repository '{org}/{repo}': {error}",
223
+ "totalResults": 0,
224
+ "resultsShared": 0,
225
+ "isError": True,
226
+ }
227
+
228
+ if not all_files:
229
+ return {
230
+ "formatted": f"No files found in repository '{org}/{repo}'",
231
+ "totalResults": 0,
232
+ "resultsShared": 0,
233
+ }
234
+
235
+ return None
236
 
237
 
238
  def find_examples(
239
+ keyword: str = "",
240
+ repo: str = "",
241
  org: str = "huggingface",
 
242
  max_results: int = 10,
243
+ min_score: int = 80,
244
  ) -> ToolResult:
245
  """
246
+ Find example files in a repository using fuzzy matching.
247
 
248
  Args:
249
+ keyword: Keyword to fuzzy match against file paths (e.g., "grpo")
250
+ repo: Repository name (e.g., "trl")
251
+ org: GitHub organization (default: "huggingface")
252
+ max_results: Maximum number of results (default 50)
253
+ min_score: Minimum fuzzy match score (0-100, default 60)
254
 
255
  Returns:
256
+ ToolResult with matching files, or similar repos if repo not found
257
  """
258
  token = os.environ.get("GITHUB_TOKEN")
259
  if not token:
 
264
  "isError": True,
265
  }
266
 
267
+ if not repo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  return {
269
+ "formatted": "Error: repo parameter is required",
270
  "totalResults": 0,
271
  "resultsShared": 0,
272
+ "isError": True,
273
  }
274
 
275
+ # Get all files in the repository
276
+ all_files, error = _get_repo_tree(org, repo, token)
 
277
 
278
+ # Handle errors (not found, API errors, empty repo)
279
+ if error_result := _handle_repo_tree_errors(all_files, error, org, repo, token):
280
+ return error_result
281
+
282
+ # Step 1: Filter files by example patterns (score >= 60)
283
+ example_threshold = 60
284
+ example_files = []
285
+ for file in all_files:
286
+ example_score = _score_against_example_patterns(file["path"])
287
+ if example_score >= example_threshold:
288
+ example_files.append({**file, "example_score": example_score})
289
+
290
+ if not example_files:
291
+ return {
292
+ "formatted": f"No example files found in {org}/{repo} (no files match example patterns with score >= {example_threshold}).",
293
+ "totalResults": 0,
294
+ "resultsShared": 0,
295
+ }
296
+
297
+ # Step 2: If keyword provided, score and filter by keyword
298
+ if keyword:
299
+ scored_files = []
300
+ for file in example_files:
301
+ keyword_score = _score_against_keyword(file["path"], keyword)
302
+ if keyword_score >= min_score:
303
+ scored_files.append({**file, "score": keyword_score})
304
+
305
+ if not scored_files:
306
+ return {
307
+ "formatted": f"No files found in {org}/{repo} matching keyword '{keyword}' (min score: {min_score}) among {len(example_files)} example files.",
308
+ "totalResults": 0,
309
+ "resultsShared": 0,
310
+ }
311
+ else:
312
+ # No keyword: use example pattern scores
313
+ scored_files = [
314
+ {**file, "score": file["example_score"]}
315
+ for file in example_files
316
+ if file["example_score"] >= min_score
317
+ ]
318
+
319
+ if not scored_files:
320
+ return {
321
+ "formatted": f"No example files found in {org}/{repo} with score >= {min_score}.",
322
+ "totalResults": 0,
323
+ "resultsShared": 0,
324
  }
 
325
 
326
+ # Sort by score (descending) for best matches first
327
+ scored_files.sort(key=lambda x: x["score"], reverse=True)
328
 
329
+ # Limit results
330
+ results = scored_files[:max_results]
331
 
332
+ # Format output
333
+ keyword_desc = f" matching '{keyword}'" if keyword else ""
334
+ lines = [f"**Found {len(results)} example files in {org}/{repo}{keyword_desc}:**"]
335
+ if len(scored_files) > max_results:
336
+ lines[0] += f" (showing top {max_results} of {len(scored_files)})"
337
+ lines.append("")
338
+
339
+ for i, file in enumerate(results, 1):
340
+ lines.append(f"{i}. **{file['path']}** (score: {file['score']})")
341
+ lines.append(f" Size: {file['size']:,} bytes | SHA: {file['sha'][:7]}")
342
+ lines.append(f" URL: {file['url']}")
343
+ lines.append("")
344
 
345
  return {
346
  "formatted": "\n".join(lines),
347
+ "totalResults": len(results),
348
+ "resultsShared": len(results),
349
  }
350
 
351
 
352
  # Tool specification
353
  GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
354
+ "name": "github_find_examples",
355
  "description": (
356
+ "Find example files in a GitHub repository using fuzzy matching.\n\n"
357
+ "This tool uses fuzzy string matching to find files related to a keyword or common example patterns. "
358
+ "It calculates similarity scores and returns the best matches.\n\n"
359
+ "Global example keywords (always fuzzy matched): example, tutorial, demo, quickstart, guide, sample\n\n"
360
+ "If the repository is not found, it returns similar repositories sorted by star count.\n\n"
361
+ "Features:\n"
362
+ "- Fuzzy matching using Levenshtein distance\n"
363
+ "- Sorted by match score (best matches first)\n"
364
+ "- Auto-suggests similar repos if target not found\n"
365
+ "- Configurable minimum score threshold\n\n"
366
  "## Examples:\n\n"
367
+ "**Find GRPO examples in TRL:**\n"
368
+ "{'keyword': 'grpo', 'repo': 'trl', 'org': 'huggingface'}\n"
369
+ " Matches: examples/scripts/grpo_agent.py, examples/scripts/gspo.py\n\n"
370
+ "**Find tutorial files in transformers:**\n"
371
+ "{'keyword': 'tutorial', 'repo': 'transformers', 'org': 'huggingface'}\n\n"
372
+ "**Find any example files (no keyword):**\n"
373
+ "{'repo': 'pytorch', 'org': 'pytorch'}\n"
374
+ "→ Uses global example keywords for matching\n\n"
375
+ "**Adjust minimum score:**\n"
376
+ "{'keyword': 'bert', 'repo': 'transformers', 'org': 'huggingface', 'min_score': 70}\n\n"
377
+ "Returns list of matching files with fuzzy match scores, paths, sizes, and URLs."
378
  ),
379
  "parameters": {
380
  "type": "object",
381
  "properties": {
382
+ "keyword": {
383
  "type": "string",
384
+ "description": "Keyword to fuzzy match against file paths (e.g., 'grpo', 'bert'). Optional.",
385
  },
386
+ "repo": {
387
  "type": "string",
388
+ "description": "Repository name (e.g., 'trl', 'transformers'). Required.",
389
  },
390
+ "org": {
391
  "type": "string",
392
+ "description": "GitHub organization or username. Default: 'huggingface'.",
393
  },
394
  "max_results": {
395
  "type": "integer",
396
+ "description": "Maximum number of results to return. Default: 50.",
397
+ },
398
+ "min_score": {
399
+ "type": "integer",
400
+ "description": "Minimum fuzzy match score (0-100). Default: 60.",
401
  },
402
  },
403
+ "required": ["repo"],
404
  },
405
  }
406
 
 
409
  """Handler for agent tool router"""
410
  try:
411
  result = find_examples(
412
+ keyword=arguments.get("keyword", ""),
413
+ repo=arguments["repo"],
414
  org=arguments.get("org", "huggingface"),
415
+ max_results=arguments.get("max_results", 50),
416
+ min_score=arguments.get("min_score", 60),
417
  )
418
  return result["formatted"], not result.get("isError", False)
419
  except Exception as e:
pyproject.toml CHANGED
@@ -23,4 +23,5 @@ dependencies = [
23
  "prompt-toolkit>=3.0.0",
24
  "ipykernel>=7.1.0",
25
  "ipywidgets>=8.1.8",
 
26
  ]
 
23
  "prompt-toolkit>=3.0.0",
24
  "ipykernel>=7.1.0",
25
  "ipywidgets>=8.1.8",
26
+ "thefuzz>=0.22.1",
27
  ]
uv.lock CHANGED
@@ -945,6 +945,7 @@ dependencies = [
945
  { name = "python-dotenv" },
946
  { name = "requests" },
947
  { name = "tenacity" },
 
948
  { name = "torch" },
949
  { name = "transformers" },
950
  ]
@@ -967,6 +968,7 @@ requires-dist = [
967
  { name = "python-dotenv", specifier = ">=1.2.1" },
968
  { name = "requests", specifier = ">=2.32.5" },
969
  { name = "tenacity", specifier = ">=8.0.0" },
 
970
  { name = "torch", specifier = ">=2.9.1" },
971
  { name = "transformers", specifier = ">=2.3.0" },
972
  ]
@@ -3490,6 +3492,69 @@ wheels = [
3490
  { url = "https://files.pythonhosted.org/packages/81/d6/4bfbb40c9a0b42fc53c7cf442f6385db70b40f74a783130c5d0a5aa62228/pyzmq-27.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dc5dbf68a7857b59473f7df42650c621d7e8923fb03fa74a526890f4d33cc4d7", size = 575170, upload-time = "2025-09-08T23:09:01.418Z" },
3491
  ]
3492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3493
  [[package]]
3494
  name = "referencing"
3495
  version = "0.36.2"
@@ -3946,6 +4011,18 @@ wheels = [
3946
  { url = "https://files.pythonhosted.org/packages/53/b3/95ab646b0c908823d71e49ab8b5949ec9f33346cee3897d1af6be28a8d91/textual-6.6.0-py3-none-any.whl", hash = "sha256:5a9484bd15ee8a6fd8ac4ed4849fb25ee56bed2cecc7b8a83c4cd7d5f19515e5", size = 712606, upload-time = "2025-11-10T17:49:58.391Z" },
3947
  ]
3948
 
 
 
 
 
 
 
 
 
 
 
 
 
3949
  [[package]]
3950
  name = "tiktoken"
3951
  version = "0.12.0"
 
945
  { name = "python-dotenv" },
946
  { name = "requests" },
947
  { name = "tenacity" },
948
+ { name = "thefuzz" },
949
  { name = "torch" },
950
  { name = "transformers" },
951
  ]
 
968
  { name = "python-dotenv", specifier = ">=1.2.1" },
969
  { name = "requests", specifier = ">=2.32.5" },
970
  { name = "tenacity", specifier = ">=8.0.0" },
971
+ { name = "thefuzz", specifier = ">=0.22.1" },
972
  { name = "torch", specifier = ">=2.9.1" },
973
  { name = "transformers", specifier = ">=2.3.0" },
974
  ]
 
3492
  { url = "https://files.pythonhosted.org/packages/81/d6/4bfbb40c9a0b42fc53c7cf442f6385db70b40f74a783130c5d0a5aa62228/pyzmq-27.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dc5dbf68a7857b59473f7df42650c621d7e8923fb03fa74a526890f4d33cc4d7", size = 575170, upload-time = "2025-09-08T23:09:01.418Z" },
3493
  ]
3494
 
3495
+ [[package]]
3496
+ name = "rapidfuzz"
3497
+ version = "3.14.3"
3498
+ source = { registry = "https://pypi.org/simple" }
3499
+ sdist = { url = "https://files.pythonhosted.org/packages/d3/28/9d808fe62375b9aab5ba92fa9b29371297b067c2790b2d7cda648b1e2f8d/rapidfuzz-3.14.3.tar.gz", hash = "sha256:2491937177868bc4b1e469087601d53f925e8d270ccc21e07404b4b5814b7b5f", size = 57863900, upload-time = "2025-11-01T11:54:52.321Z" }
3500
+ wheels = [
3501
+ { url = "https://files.pythonhosted.org/packages/fa/8e/3c215e860b458cfbedb3ed73bc72e98eb7e0ed72f6b48099604a7a3260c2/rapidfuzz-3.14.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:685c93ea961d135893b5984a5a9851637d23767feabe414ec974f43babbd8226", size = 1945306, upload-time = "2025-11-01T11:53:06.452Z" },
3502
+ { url = "https://files.pythonhosted.org/packages/36/d9/31b33512015c899f4a6e6af64df8dfe8acddf4c8b40a4b3e0e6e1bcd00e5/rapidfuzz-3.14.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fa7c8f26f009f8c673fbfb443792f0cf8cf50c4e18121ff1e285b5e08a94fbdb", size = 1390788, upload-time = "2025-11-01T11:53:08.721Z" },
3503
+ { url = "https://files.pythonhosted.org/packages/a9/67/2ee6f8de6e2081ccd560a571d9c9063184fe467f484a17fa90311a7f4a2e/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57f878330c8d361b2ce76cebb8e3e1dc827293b6abf404e67d53260d27b5d941", size = 1374580, upload-time = "2025-11-01T11:53:10.164Z" },
3504
+ { url = "https://files.pythonhosted.org/packages/30/83/80d22997acd928eda7deadc19ccd15883904622396d6571e935993e0453a/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c5f545f454871e6af05753a0172849c82feaf0f521c5ca62ba09e1b382d6382", size = 3154947, upload-time = "2025-11-01T11:53:12.093Z" },
3505
+ { url = "https://files.pythonhosted.org/packages/5b/cf/9f49831085a16384695f9fb096b99662f589e30b89b4a589a1ebc1a19d34/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:07aa0b5d8863e3151e05026a28e0d924accf0a7a3b605da978f0359bb804df43", size = 1223872, upload-time = "2025-11-01T11:53:13.664Z" },
3506
+ { url = "https://files.pythonhosted.org/packages/c8/0f/41ee8034e744b871c2e071ef0d360686f5ccfe5659f4fd96c3ec406b3c8b/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73b07566bc7e010e7b5bd490fb04bb312e820970180df6b5655e9e6224c137db", size = 2392512, upload-time = "2025-11-01T11:53:15.109Z" },
3507
+ { url = "https://files.pythonhosted.org/packages/da/86/280038b6b0c2ccec54fb957c732ad6b41cc1fd03b288d76545b9cf98343f/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6de00eb84c71476af7d3110cf25d8fe7c792d7f5fa86764ef0b4ca97e78ca3ed", size = 2521398, upload-time = "2025-11-01T11:53:17.146Z" },
3508
+ { url = "https://files.pythonhosted.org/packages/fa/7b/05c26f939607dca0006505e3216248ae2de631e39ef94dd63dbbf0860021/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d7843a1abf0091773a530636fdd2a49a41bcae22f9910b86b4f903e76ddc82dc", size = 4259416, upload-time = "2025-11-01T11:53:19.34Z" },
3509
+ { url = "https://files.pythonhosted.org/packages/40/eb/9e3af4103d91788f81111af1b54a28de347cdbed8eaa6c91d5e98a889aab/rapidfuzz-3.14.3-cp312-cp312-win32.whl", hash = "sha256:dea97ac3ca18cd3ba8f3d04b5c1fe4aa60e58e8d9b7793d3bd595fdb04128d7a", size = 1709527, upload-time = "2025-11-01T11:53:20.949Z" },
3510
+ { url = "https://files.pythonhosted.org/packages/b8/63/d06ecce90e2cf1747e29aeab9f823d21e5877a4c51b79720b2d3be7848f8/rapidfuzz-3.14.3-cp312-cp312-win_amd64.whl", hash = "sha256:b5100fd6bcee4d27f28f4e0a1c6b5127bc8ba7c2a9959cad9eab0bf4a7ab3329", size = 1538989, upload-time = "2025-11-01T11:53:22.428Z" },
3511
+ { url = "https://files.pythonhosted.org/packages/fc/6d/beee32dcda64af8128aab3ace2ccb33d797ed58c434c6419eea015fec779/rapidfuzz-3.14.3-cp312-cp312-win_arm64.whl", hash = "sha256:4e49c9e992bc5fc873bd0fff7ef16a4405130ec42f2ce3d2b735ba5d3d4eb70f", size = 811161, upload-time = "2025-11-01T11:53:23.811Z" },
3512
+ { url = "https://files.pythonhosted.org/packages/e4/4f/0d94d09646853bd26978cb3a7541b6233c5760687777fa97da8de0d9a6ac/rapidfuzz-3.14.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dbcb726064b12f356bf10fffdb6db4b6dce5390b23627c08652b3f6e49aa56ae", size = 1939646, upload-time = "2025-11-01T11:53:25.292Z" },
3513
+ { url = "https://files.pythonhosted.org/packages/b6/eb/f96aefc00f3bbdbab9c0657363ea8437a207d7545ac1c3789673e05d80bd/rapidfuzz-3.14.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1704fc70d214294e554a2421b473779bcdeef715881c5e927dc0f11e1692a0ff", size = 1385512, upload-time = "2025-11-01T11:53:27.594Z" },
3514
+ { url = "https://files.pythonhosted.org/packages/26/34/71c4f7749c12ee223dba90017a5947e8f03731a7cc9f489b662a8e9e643d/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc65e72790ddfd310c2c8912b45106e3800fefe160b0c2ef4d6b6fec4e826457", size = 1373571, upload-time = "2025-11-01T11:53:29.096Z" },
3515
+ { url = "https://files.pythonhosted.org/packages/32/00/ec8597a64f2be301ce1ee3290d067f49f6a7afb226b67d5f15b56d772ba5/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e38c1305cffae8472572a0584d4ffc2f130865586a81038ca3965301f7c97c", size = 3156759, upload-time = "2025-11-01T11:53:30.777Z" },
3516
+ { url = "https://files.pythonhosted.org/packages/61/d5/b41eeb4930501cc899d5a9a7b5c9a33d85a670200d7e81658626dcc0ecc0/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:e195a77d06c03c98b3fc06b8a28576ba824392ce40de8c708f96ce04849a052e", size = 1222067, upload-time = "2025-11-01T11:53:32.334Z" },
3517
+ { url = "https://files.pythonhosted.org/packages/2a/7d/6d9abb4ffd1027c6ed837b425834f3bed8344472eb3a503ab55b3407c721/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1b7ef2f4b8583a744338a18f12c69693c194fb6777c0e9ada98cd4d9e8f09d10", size = 2394775, upload-time = "2025-11-01T11:53:34.24Z" },
3518
+ { url = "https://files.pythonhosted.org/packages/15/ce/4f3ab4c401c5a55364da1ffff8cc879fc97b4e5f4fa96033827da491a973/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a2135b138bcdcb4c3742d417f215ac2d8c2b87bde15b0feede231ae95f09ec41", size = 2526123, upload-time = "2025-11-01T11:53:35.779Z" },
3519
+ { url = "https://files.pythonhosted.org/packages/c1/4b/54f804975376a328f57293bd817c12c9036171d15cf7292032e3f5820b2d/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:33a325ed0e8e1aa20c3e75f8ab057a7b248fdea7843c2a19ade0008906c14af0", size = 4262874, upload-time = "2025-11-01T11:53:37.866Z" },
3520
+ { url = "https://files.pythonhosted.org/packages/e9/b6/958db27d8a29a50ee6edd45d33debd3ce732e7209183a72f57544cd5fe22/rapidfuzz-3.14.3-cp313-cp313-win32.whl", hash = "sha256:8383b6d0d92f6cd008f3c9216535be215a064b2cc890398a678b56e6d280cb63", size = 1707972, upload-time = "2025-11-01T11:53:39.442Z" },
3521
+ { url = "https://files.pythonhosted.org/packages/07/75/fde1f334b0cec15b5946d9f84d73250fbfcc73c236b4bc1b25129d90876b/rapidfuzz-3.14.3-cp313-cp313-win_amd64.whl", hash = "sha256:e6b5e3036976f0fde888687d91be86d81f9ac5f7b02e218913c38285b756be6c", size = 1537011, upload-time = "2025-11-01T11:53:40.92Z" },
3522
+ { url = "https://files.pythonhosted.org/packages/2e/d7/d83fe001ce599dc7ead57ba1debf923dc961b6bdce522b741e6b8c82f55c/rapidfuzz-3.14.3-cp313-cp313-win_arm64.whl", hash = "sha256:7ba009977601d8b0828bfac9a110b195b3e4e79b350dcfa48c11269a9f1918a0", size = 810744, upload-time = "2025-11-01T11:53:42.723Z" },
3523
+ { url = "https://files.pythonhosted.org/packages/92/13/a486369e63ff3c1a58444d16b15c5feb943edd0e6c28a1d7d67cb8946b8f/rapidfuzz-3.14.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0a28add871425c2fe94358c6300bbeb0bc2ed828ca003420ac6825408f5a424", size = 1967702, upload-time = "2025-11-01T11:53:44.554Z" },
3524
+ { url = "https://files.pythonhosted.org/packages/f1/82/efad25e260b7810f01d6b69122685e355bed78c94a12784bac4e0beb2afb/rapidfuzz-3.14.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010e12e2411a4854b0434f920e72b717c43f8ec48d57e7affe5c42ecfa05dd0e", size = 1410702, upload-time = "2025-11-01T11:53:46.066Z" },
3525
+ { url = "https://files.pythonhosted.org/packages/ba/1a/34c977b860cde91082eae4a97ae503f43e0d84d4af301d857679b66f9869/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cfc3d57abd83c734d1714ec39c88a34dd69c85474918ebc21296f1e61eb5ca8", size = 1382337, upload-time = "2025-11-01T11:53:47.62Z" },
3526
+ { url = "https://files.pythonhosted.org/packages/88/74/f50ea0e24a5880a9159e8fd256b84d8f4634c2f6b4f98028bdd31891d907/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:89acb8cbb52904f763e5ac238083b9fc193bed8d1f03c80568b20e4cef43a519", size = 3165563, upload-time = "2025-11-01T11:53:49.216Z" },
3527
+ { url = "https://files.pythonhosted.org/packages/e8/7a/e744359404d7737049c26099423fc54bcbf303de5d870d07d2fb1410f567/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_31_armv7l.whl", hash = "sha256:7d9af908c2f371bfb9c985bd134e295038e3031e666e4b2ade1e7cb7f5af2f1a", size = 1214727, upload-time = "2025-11-01T11:53:50.883Z" },
3528
+ { url = "https://files.pythonhosted.org/packages/d3/2e/87adfe14ce75768ec6c2b8acd0e05e85e84be4be5e3d283cdae360afc4fe/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1f1925619627f8798f8c3a391d81071336942e5fe8467bc3c567f982e7ce2897", size = 2403349, upload-time = "2025-11-01T11:53:52.322Z" },
3529
+ { url = "https://files.pythonhosted.org/packages/70/17/6c0b2b2bff9c8b12e12624c07aa22e922b0c72a490f180fa9183d1ef2c75/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:152555187360978119e98ce3e8263d70dd0c40c7541193fc302e9b7125cf8f58", size = 2507596, upload-time = "2025-11-01T11:53:53.835Z" },
3530
+ { url = "https://files.pythonhosted.org/packages/c3/d1/87852a7cbe4da7b962174c749a47433881a63a817d04f3e385ea9babcd9e/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52619d25a09546b8db078981ca88939d72caa6b8701edd8b22e16482a38e799f", size = 4273595, upload-time = "2025-11-01T11:53:55.961Z" },
3531
+ { url = "https://files.pythonhosted.org/packages/c1/ab/1d0354b7d1771a28fa7fe089bc23acec2bdd3756efa2419f463e3ed80e16/rapidfuzz-3.14.3-cp313-cp313t-win32.whl", hash = "sha256:489ce98a895c98cad284f0a47960c3e264c724cb4cfd47a1430fa091c0c25204", size = 1757773, upload-time = "2025-11-01T11:53:57.628Z" },
3532
+ { url = "https://files.pythonhosted.org/packages/0b/0c/71ef356adc29e2bdf74cd284317b34a16b80258fa0e7e242dd92cc1e6d10/rapidfuzz-3.14.3-cp313-cp313t-win_amd64.whl", hash = "sha256:656e52b054d5b5c2524169240e50cfa080b04b1c613c5f90a2465e84888d6f15", size = 1576797, upload-time = "2025-11-01T11:53:59.455Z" },
3533
+ { url = "https://files.pythonhosted.org/packages/fe/d2/0e64fc27bb08d4304aa3d11154eb5480bcf5d62d60140a7ee984dc07468a/rapidfuzz-3.14.3-cp313-cp313t-win_arm64.whl", hash = "sha256:c7e40c0a0af02ad6e57e89f62bef8604f55a04ecae90b0ceeda591bbf5923317", size = 829940, upload-time = "2025-11-01T11:54:01.1Z" },
3534
+ { url = "https://files.pythonhosted.org/packages/32/6f/1b88aaeade83abc5418788f9e6b01efefcd1a69d65ded37d89cd1662be41/rapidfuzz-3.14.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:442125473b247227d3f2de807a11da6c08ccf536572d1be943f8e262bae7e4ea", size = 1942086, upload-time = "2025-11-01T11:54:02.592Z" },
3535
+ { url = "https://files.pythonhosted.org/packages/a0/2c/b23861347436cb10f46c2bd425489ec462790faaa360a54a7ede5f78de88/rapidfuzz-3.14.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ec0c8c0c3d4f97ced46b2e191e883f8c82dbbf6d5ebc1842366d7eff13cd5a6", size = 1386993, upload-time = "2025-11-01T11:54:04.12Z" },
3536
+ { url = "https://files.pythonhosted.org/packages/83/86/5d72e2c060aa1fbdc1f7362d938f6b237dff91f5b9fc5dd7cc297e112250/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2dc37bc20272f388b8c3a4eba4febc6e77e50a8f450c472def4751e7678f55e4", size = 1379126, upload-time = "2025-11-01T11:54:05.777Z" },
3537
+ { url = "https://files.pythonhosted.org/packages/c9/bc/ef2cee3e4d8b3fc22705ff519f0d487eecc756abdc7c25d53686689d6cf2/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dee362e7e79bae940a5e2b3f6d09c6554db6a4e301cc68343886c08be99844f1", size = 3159304, upload-time = "2025-11-01T11:54:07.351Z" },
3538
+ { url = "https://files.pythonhosted.org/packages/a0/36/dc5f2f62bbc7bc90be1f75eeaf49ed9502094bb19290dfb4747317b17f12/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:4b39921df948388a863f0e267edf2c36302983459b021ab928d4b801cbe6a421", size = 1218207, upload-time = "2025-11-01T11:54:09.641Z" },
3539
+ { url = "https://files.pythonhosted.org/packages/df/7e/8f4be75c1bc62f47edf2bbbe2370ee482fae655ebcc4718ac3827ead3904/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:beda6aa9bc44d1d81242e7b291b446be352d3451f8217fcb068fc2933927d53b", size = 2401245, upload-time = "2025-11-01T11:54:11.543Z" },
3540
+ { url = "https://files.pythonhosted.org/packages/05/38/f7c92759e1bb188dd05b80d11c630ba59b8d7856657baf454ff56059c2ab/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:6a014ba09657abfcfeed64b7d09407acb29af436d7fc075b23a298a7e4a6b41c", size = 2518308, upload-time = "2025-11-01T11:54:13.134Z" },
3541
+ { url = "https://files.pythonhosted.org/packages/c7/ac/85820f70fed5ecb5f1d9a55f1e1e2090ef62985ef41db289b5ac5ec56e28/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:32eeafa3abce138bb725550c0e228fc7eaeec7059aa8093d9cbbec2b58c2371a", size = 4265011, upload-time = "2025-11-01T11:54:15.087Z" },
3542
+ { url = "https://files.pythonhosted.org/packages/46/a9/616930721ea9835c918af7cde22bff17f9db3639b0c1a7f96684be7f5630/rapidfuzz-3.14.3-cp314-cp314-win32.whl", hash = "sha256:adb44d996fc610c7da8c5048775b21db60dd63b1548f078e95858c05c86876a3", size = 1742245, upload-time = "2025-11-01T11:54:17.19Z" },
3543
+ { url = "https://files.pythonhosted.org/packages/06/8a/f2fa5e9635b1ccafda4accf0e38246003f69982d7c81f2faa150014525a4/rapidfuzz-3.14.3-cp314-cp314-win_amd64.whl", hash = "sha256:f3d15d8527e2b293e38ce6e437631af0708df29eafd7c9fc48210854c94472f9", size = 1584856, upload-time = "2025-11-01T11:54:18.764Z" },
3544
+ { url = "https://files.pythonhosted.org/packages/ef/97/09e20663917678a6d60d8e0e29796db175b1165e2079830430342d5298be/rapidfuzz-3.14.3-cp314-cp314-win_arm64.whl", hash = "sha256:576e4b9012a67e0bf54fccb69a7b6c94d4e86a9540a62f1a5144977359133583", size = 833490, upload-time = "2025-11-01T11:54:20.753Z" },
3545
+ { url = "https://files.pythonhosted.org/packages/03/1b/6b6084576ba87bf21877c77218a0c97ba98cb285b0c02eaaee3acd7c4513/rapidfuzz-3.14.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:cec3c0da88562727dd5a5a364bd9efeb535400ff0bfb1443156dd139a1dd7b50", size = 1968658, upload-time = "2025-11-01T11:54:22.25Z" },
3546
+ { url = "https://files.pythonhosted.org/packages/38/c0/fb02a0db80d95704b0a6469cc394e8c38501abf7e1c0b2afe3261d1510c2/rapidfuzz-3.14.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d1fa009f8b1100e4880868137e7bf0501422898f7674f2adcd85d5a67f041296", size = 1410742, upload-time = "2025-11-01T11:54:23.863Z" },
3547
+ { url = "https://files.pythonhosted.org/packages/a4/72/3fbf12819fc6afc8ec75a45204013b40979d068971e535a7f3512b05e765/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b86daa7419b5e8b180690efd1fdbac43ff19230803282521c5b5a9c83977655", size = 1382810, upload-time = "2025-11-01T11:54:25.571Z" },
3548
+ { url = "https://files.pythonhosted.org/packages/0f/18/0f1991d59bb7eee28922a00f79d83eafa8c7bfb4e8edebf4af2a160e7196/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7bd1816db05d6c5ffb3a4df0a2b7b56fb8c81ef584d08e37058afa217da91b1", size = 3166349, upload-time = "2025-11-01T11:54:27.195Z" },
3549
+ { url = "https://files.pythonhosted.org/packages/0d/f0/baa958b1989c8f88c78bbb329e969440cf330b5a01a982669986495bb980/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:33da4bbaf44e9755b0ce192597f3bde7372fe2e381ab305f41b707a95ac57aa7", size = 1214994, upload-time = "2025-11-01T11:54:28.821Z" },
3550
+ { url = "https://files.pythonhosted.org/packages/e4/a0/cd12ec71f9b2519a3954febc5740291cceabc64c87bc6433afcb36259f3b/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3fecce764cf5a991ee2195a844196da840aba72029b2612f95ac68a8b74946bf", size = 2403919, upload-time = "2025-11-01T11:54:30.393Z" },
3551
+ { url = "https://files.pythonhosted.org/packages/0b/ce/019bd2176c1644098eced4f0595cb4b3ef52e4941ac9a5854f209d0a6e16/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:ecd7453e02cf072258c3a6b8e930230d789d5d46cc849503729f9ce475d0e785", size = 2508346, upload-time = "2025-11-01T11:54:32.048Z" },
3552
+ { url = "https://files.pythonhosted.org/packages/23/f8/be16c68e2c9e6c4f23e8f4adbb7bccc9483200087ed28ff76c5312da9b14/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ea188aa00e9bcae8c8411f006a5f2f06c4607a02f24eab0d8dc58566aa911f35", size = 4274105, upload-time = "2025-11-01T11:54:33.701Z" },
3553
+ { url = "https://files.pythonhosted.org/packages/a1/d1/5ab148e03f7e6ec8cd220ccf7af74d3aaa4de26dd96df58936beb7cba820/rapidfuzz-3.14.3-cp314-cp314t-win32.whl", hash = "sha256:7ccbf68100c170e9a0581accbe9291850936711548c6688ce3bfb897b8c589ad", size = 1793465, upload-time = "2025-11-01T11:54:35.331Z" },
3554
+ { url = "https://files.pythonhosted.org/packages/cd/97/433b2d98e97abd9fff1c470a109b311669f44cdec8d0d5aa250aceaed1fb/rapidfuzz-3.14.3-cp314-cp314t-win_amd64.whl", hash = "sha256:9ec02e62ae765a318d6de38df609c57fc6dacc65c0ed1fd489036834fd8a620c", size = 1623491, upload-time = "2025-11-01T11:54:38.085Z" },
3555
+ { url = "https://files.pythonhosted.org/packages/e2/f6/e2176eb94f94892441bce3ddc514c179facb65db245e7ce3356965595b19/rapidfuzz-3.14.3-cp314-cp314t-win_arm64.whl", hash = "sha256:e805e52322ae29aa945baf7168b6c898120fbc16d2b8f940b658a5e9e3999253", size = 851487, upload-time = "2025-11-01T11:54:40.176Z" },
3556
+ ]
3557
+
3558
  [[package]]
3559
  name = "referencing"
3560
  version = "0.36.2"
 
4011
  { url = "https://files.pythonhosted.org/packages/53/b3/95ab646b0c908823d71e49ab8b5949ec9f33346cee3897d1af6be28a8d91/textual-6.6.0-py3-none-any.whl", hash = "sha256:5a9484bd15ee8a6fd8ac4ed4849fb25ee56bed2cecc7b8a83c4cd7d5f19515e5", size = 712606, upload-time = "2025-11-10T17:49:58.391Z" },
4012
  ]
4013
 
4014
+ [[package]]
4015
+ name = "thefuzz"
4016
+ version = "0.22.1"
4017
+ source = { registry = "https://pypi.org/simple" }
4018
+ dependencies = [
4019
+ { name = "rapidfuzz" },
4020
+ ]
4021
+ sdist = { url = "https://files.pythonhosted.org/packages/81/4b/d3eb25831590d6d7d38c2f2e3561d3ba41d490dc89cd91d9e65e7c812508/thefuzz-0.22.1.tar.gz", hash = "sha256:7138039a7ecf540da323792d8592ef9902b1d79eb78c147d4f20664de79f3680", size = 19993, upload-time = "2024-01-19T19:18:23.135Z" }
4022
+ wheels = [
4023
+ { url = "https://files.pythonhosted.org/packages/82/4f/1695e70ceb3604f19eda9908e289c687ea81c4fecef4d90a9d1d0f2f7ae9/thefuzz-0.22.1-py3-none-any.whl", hash = "sha256:59729b33556850b90e1093c4cf9e618af6f2e4c985df193fdf3c5b5cf02ca481", size = 8245, upload-time = "2024-01-19T19:18:20.362Z" },
4024
+ ]
4025
+
4026
  [[package]]
4027
  name = "tiktoken"
4028
  version = "0.12.0"