evalstate HF Staff commited on
Commit
a78cc68
·
verified ·
1 Parent(s): f058b55

Deploy OpenClaw PR API

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. Dockerfile +23 -0
  3. README.md +31 -6
  4. pyproject.toml +73 -0
  5. src/slop_farmer.egg-info/PKG-INFO +411 -0
  6. src/slop_farmer.egg-info/SOURCES.txt +68 -0
  7. src/slop_farmer.egg-info/dependency_links.txt +1 -0
  8. src/slop_farmer.egg-info/entry_points.txt +3 -0
  9. src/slop_farmer.egg-info/requires.txt +20 -0
  10. src/slop_farmer.egg-info/top_level.txt +1 -0
  11. src/slop_farmer/__init__.py +3 -0
  12. src/slop_farmer/__pycache__/__init__.cpython-310.pyc +0 -0
  13. src/slop_farmer/__pycache__/__init__.cpython-312.pyc +0 -0
  14. src/slop_farmer/__pycache__/__init__.cpython-313.pyc +0 -0
  15. src/slop_farmer/__pycache__/__init__.cpython-314.pyc +0 -0
  16. src/slop_farmer/__pycache__/analysis.cpython-310.pyc +0 -0
  17. src/slop_farmer/__pycache__/analysis.cpython-313.pyc +3 -0
  18. src/slop_farmer/__pycache__/analysis.cpython-314.pyc +3 -0
  19. src/slop_farmer/__pycache__/analysis_cache.cpython-310.pyc +0 -0
  20. src/slop_farmer/__pycache__/analysis_cache.cpython-313.pyc +0 -0
  21. src/slop_farmer/__pycache__/analysis_cache.cpython-314.pyc +0 -0
  22. src/slop_farmer/__pycache__/app_config.cpython-313.pyc +0 -0
  23. src/slop_farmer/__pycache__/app_config.cpython-314.pyc +0 -0
  24. src/slop_farmer/__pycache__/canonical_duplicate_pr.cpython-310.pyc +0 -0
  25. src/slop_farmer/__pycache__/canonical_duplicate_pr.cpython-313.pyc +0 -0
  26. src/slop_farmer/__pycache__/cli.cpython-310.pyc +0 -0
  27. src/slop_farmer/__pycache__/cli.cpython-312.pyc +0 -0
  28. src/slop_farmer/__pycache__/cli.cpython-313.pyc +0 -0
  29. src/slop_farmer/__pycache__/cli.cpython-314.pyc +0 -0
  30. src/slop_farmer/__pycache__/config.cpython-310.pyc +0 -0
  31. src/slop_farmer/__pycache__/config.cpython-312.pyc +0 -0
  32. src/slop_farmer/__pycache__/config.cpython-313.pyc +0 -0
  33. src/slop_farmer/__pycache__/config.cpython-314.pyc +0 -0
  34. src/slop_farmer/__pycache__/dashboard.cpython-313.pyc +0 -0
  35. src/slop_farmer/__pycache__/dashboard.cpython-314.pyc +0 -0
  36. src/slop_farmer/__pycache__/deploy.cpython-313.pyc +0 -0
  37. src/slop_farmer/__pycache__/duplicate_prs.cpython-313.pyc +0 -0
  38. src/slop_farmer/__pycache__/github_api.cpython-310.pyc +0 -0
  39. src/slop_farmer/__pycache__/github_api.cpython-313.pyc +0 -0
  40. src/slop_farmer/__pycache__/github_api.cpython-314.pyc +0 -0
  41. src/slop_farmer/__pycache__/hf_checkpoint_import.cpython-310.pyc +0 -0
  42. src/slop_farmer/__pycache__/hf_checkpoint_import.cpython-313.pyc +0 -0
  43. src/slop_farmer/__pycache__/http.cpython-310.pyc +0 -0
  44. src/slop_farmer/__pycache__/http.cpython-313.pyc +0 -0
  45. src/slop_farmer/__pycache__/http.cpython-314.pyc +0 -0
  46. src/slop_farmer/__pycache__/links.cpython-310.pyc +0 -0
  47. src/slop_farmer/__pycache__/links.cpython-313.pyc +0 -0
  48. src/slop_farmer/__pycache__/new_contributor_report.cpython-313.pyc +0 -0
  49. src/slop_farmer/__pycache__/new_contributor_report.cpython-314.pyc +0 -0
  50. src/slop_farmer/__pycache__/normalize.cpython-310.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ src/slop_farmer/__pycache__/analysis.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text
37
+ src/slop_farmer/__pycache__/analysis.cpython-314.pyc filter=lfs diff=lfs merge=lfs -text
38
+ src/slop_farmer/reports/__pycache__/analysis.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text
39
+ src/slop_farmer/reports/__pycache__/analysis.cpython-314.pyc filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ RUN pip install --no-cache-dir uv && \
4
+ useradd -m -u 1000 user && \
5
+ mkdir -p /data && \
6
+ chmod 777 /data
7
+
8
+ USER user
9
+
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH \
12
+ UV_LINK_MODE=copy
13
+
14
+ WORKDIR $HOME/app
15
+
16
+ COPY --chown=user pyproject.toml uv.lock README.md ./
17
+ COPY --chown=user src ./src
18
+
19
+ RUN uv sync --frozen --no-dev
20
+
21
+ EXPOSE 7860
22
+
23
+ CMD ["uv", "run", "--no-dev", "uvicorn", "slop_farmer.app.pr_search_api:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,35 @@
1
  ---
2
- title: Openclaw Pr Api
3
- emoji: 🔥
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: docker
7
- pinned: false
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: OpenClaw PR API
3
+ emoji: 🔎
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: docker
7
+ app_port: 7860
8
+ short_description: Live API for OpenClaw PR code similarity search.
9
+ datasets:
10
+ - evalstate/openclaw-pr
11
+ tags:
12
+ - api
13
+ - docker
14
+ - pull-requests
15
+ - similarity
16
+ - openclaw
17
  ---
18
 
19
+ # OpenClaw PR API
20
+
21
+ Machine-oriented API for PR similarity search.
22
+
23
+ Defaults for this deployment:
24
+
25
+ - repo: `openclaw/openclaw`
26
+ - live probe source: `https://ghreplica.dutiful.dev`
27
+ - dataset: `evalstate/openclaw-pr`
28
+
29
+ CLI examples:
30
+
31
+ ```bash
32
+ pr-search repo status
33
+ pr-search pr similar 67096
34
+ pr-search pr probe 67096 --json
35
+ ```
pyproject.toml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "slop-farmer"
7
+ version = "0.1.0"
8
+ description = "GitHub-to-Hub data pipeline for transformers issue and PR triage research."
9
+ readme = "README.md"
10
+ requires-python = ">=3.13.5"
11
+ dependencies = [
12
+ "duckdb>=1.2.2",
13
+ "pyarrow>=18.0.0",
14
+ "fastapi>=0.115.0",
15
+ "huggingface_hub>=0.30.0",
16
+ "pydantic>=2.11",
17
+ "PyYAML>=6.0.2",
18
+ "rank-bm25>=0.2.2",
19
+ "fast-agent-mcp>=0.6.16",
20
+ "uvicorn>=0.34.0",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ dev = [
25
+ "httpx>=0.28.0",
26
+ "pytest>=8.3.0",
27
+ "ruff>=0.11",
28
+ "ty>=0.0.23",
29
+ ]
30
+ llm = [
31
+ "fast-agent-mcp>=0.6.16; python_full_version >= '3.13.5'",
32
+ ]
33
+
34
+ [project.scripts]
35
+ slop-farmer = "slop_farmer.app.cli:main"
36
+ pr-search = "slop_farmer.app.pr_search_client:main"
37
+
38
+ [tool.setuptools]
39
+ package-dir = {"" = "src"}
40
+
41
+ [tool.setuptools.packages.find]
42
+ where = ["src"]
43
+
44
+ [tool.pytest.ini_options]
45
+ pythonpath = ["src"]
46
+ testpaths = ["tests"]
47
+
48
+ [tool.ruff]
49
+ line-length = 100
50
+ target-version = "py311"
51
+
52
+ [tool.ruff.lint]
53
+ select = [
54
+ "B",
55
+ "E",
56
+ "F",
57
+ "I",
58
+ "RUF",
59
+ "SIM",
60
+ "UP",
61
+ ]
62
+ ignore = ["E501"]
63
+
64
+ [tool.slop-farmer.analyze]
65
+ output-dir = "eval_data"
66
+ hf-repo-id = "evalstate/transformers-pr"
67
+ ranking-backend = "hybrid"
68
+ model = "gpt-5.4-mini"
69
+ max-clusters = 10
70
+
71
+ [tool.slop-farmer.dashboard-data]
72
+ output-dir = "web/public/data"
73
+ window-days = 14
src/slop_farmer.egg-info/PKG-INFO ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: slop-farmer
3
+ Version: 0.1.0
4
+ Summary: GitHub-to-Hub data pipeline for transformers issue and PR triage research.
5
+ Requires-Python: >=3.13.5
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: duckdb>=1.2.2
8
+ Requires-Dist: pyarrow>=18.0.0
9
+ Requires-Dist: fastapi>=0.115.0
10
+ Requires-Dist: huggingface_hub>=0.30.0
11
+ Requires-Dist: pydantic>=2.11
12
+ Requires-Dist: PyYAML>=6.0.2
13
+ Requires-Dist: rank-bm25>=0.2.2
14
+ Requires-Dist: fast-agent-mcp>=0.6.16
15
+ Requires-Dist: uvicorn>=0.34.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: httpx>=0.28.0; extra == "dev"
18
+ Requires-Dist: pytest>=8.3.0; extra == "dev"
19
+ Requires-Dist: ruff>=0.11; extra == "dev"
20
+ Requires-Dist: ty>=0.0.23; extra == "dev"
21
+ Provides-Extra: llm
22
+ Requires-Dist: fast-agent-mcp>=0.6.16; python_full_version >= "3.13.5" and extra == "llm"
23
+
24
+ # slop-farmer
25
+
26
+ Pipeline for managing PR's in high volume GitHub repositories.
27
+
28
+ Scrapes PR, Issue and Contributor data in to a dataset, performs analysis and publishes a dashboard.
29
+
30
+ The pipeline stages are:
31
+ 1. Scrape - Collect data from the Github Repository
32
+ 1. Contributor Report - Look at contributors recent history.
33
+ 1. Analyze - Cluster PRs and Issues on
34
+ 1. Scope - Cluster PRs on overlapping repository areas.
35
+ 1. Dashboard Export - Export data in JSON format to populate a browsing dashboard
36
+ 1. Publish Dashboard - Build a dashboard and deploy it in a Hugging Face Space.
37
+
38
+
39
+
40
+ ## Scrape
41
+
42
+ To run a scrape you need to configure:
43
+
44
+ 1. The GitHub Repository ID
45
+ 1. A valid GitHub PAT with API access.
46
+
47
+ `uv run slop-farmer scrape --repo huggingface/diffusers --output-dir runs/diffusers/data`
48
+
49
+ ## Contributor Report
50
+
51
+ This scans the dataset for Contributors and provides a short profile of their recent public commit history and merged PR rate.
52
+
53
+ ## Analyze
54
+
55
+ Cluster PRs and Issue Content. Choice of deterministic or LLM supplemented algorithm.
56
+
57
+ When `ranking_backend=hybrid`, analysis writes reusable LLM review cache entries under
58
+ `<snapshot>/analysis-state/`. If you enable YAML config setting
59
+ `analysis.cached_analysis: true`, `analyze` will automatically copy `analysis-state/`
60
+ forward from the previous snapshot when the new snapshot does not already have it, then
61
+ log a cache-hit summary for the run. This is useful for incremental scrapes where many
62
+ review units are unchanged and can safely reuse cached hybrid decisions.
63
+
64
+ ## Scope
65
+
66
+ Cluster PRs by touched repository areas.
67
+
68
+ ## Dashboard Export / Publish
69
+
70
+ Export the report, and publish a dashboard.
71
+
72
+
73
+
74
+ ## Quickstart
75
+
76
+ ```bash
77
+ uv run slop-farmer scrape \
78
+ --repo huggingface/transformers \
79
+ --output-dir data \
80
+ --max-issues 200 \
81
+ --max-prs 50
82
+ ```
83
+
84
+ To publish a snapshot to the Hub:
85
+
86
+ ```bash
87
+ uv run slop-farmer scrape \
88
+ --repo huggingface/transformers \
89
+ --output-dir data \
90
+ --hf-repo-id burtenshaw/transformers-pr-slop-dataset \
91
+ --publish
92
+ ```
93
+
94
+ When `--publish` is used, `slop-farmer` now also generates and uploads new contributor reviewer artifacts by default:
95
+
96
+ - `new_contributors.parquet`
97
+ - `new-contributors-report.json`
98
+ - `new-contributors-report.md`
99
+
100
+ Use `--no-new-contributor-report` to skip them.
101
+
102
+ ## Nightly incremental runs
103
+
104
+ The scraper now stores a local watermark at `data/state/watermark.json` and resumes from it by default when `--since` is not provided.
105
+
106
+ ```bash
107
+ uv run slop-farmer scrape \
108
+ --repo huggingface/transformers \
109
+ --output-dir data \
110
+ --fetch-timeline
111
+ ```
112
+
113
+ On the first run, this creates a full snapshot. On later runs against the same `--output-dir`, it uses the last successful watermark, fetches only changed records, merges them into the previous snapshot locally, and writes a new full latest snapshot.
114
+
115
+ To ignore the watermark and force a fresh full run:
116
+
117
+ ```bash
118
+ uv run slop-farmer scrape \
119
+ --repo huggingface/transformers \
120
+ --output-dir data \
121
+ --no-resume
122
+ ```
123
+
124
+ Authentication defaults:
125
+
126
+ - GitHub: `GITHUB_TOKEN`, then `gh auth token`
127
+ - Hugging Face: `HF_TOKEN`, otherwise existing `hf auth` login
128
+
129
+ ## Scheduled Hugging Face Job for `configs/transformers.yaml`
130
+
131
+ To keep the Transformers dataset fresh on the Hub without relying on a local watermark,
132
+ submit the repo's job script instead:
133
+
134
+ ```bash
135
+ scripts/submit_transformers_dataset_job.sh
136
+ ```
137
+
138
+ By default this creates a scheduled HF Job that:
139
+
140
+ - reads `configs/transformers.yaml`
141
+ - refreshes `dataset_id` incrementally against the current Hub dataset state
142
+ - regenerates the new contributor report
143
+ - uploads the updated snapshot back to the dataset repo
144
+
145
+ Useful overrides:
146
+
147
+ ```bash
148
+ # fire once immediately instead of creating a schedule
149
+ MODE=run scripts/submit_transformers_dataset_job.sh
150
+
151
+ # change the cron schedule
152
+ SCHEDULE="0 */6 * * *" scripts/submit_transformers_dataset_job.sh
153
+
154
+ # optionally mount a writable HF bucket for temp files
155
+ SCRATCH_BUCKET=evalstate/slop-farmer-scratch \
156
+ scripts/submit_transformers_dataset_job.sh
157
+ ```
158
+
159
+ Buckets are best treated here as optional scratch space via `TMPDIR`, not as the canonical
160
+ published dataset. The repo's local analysis and PR-scope tooling already knows how to
161
+ materialize versioned Hub **dataset repos**; it does not currently read HF buckets directly.
162
+
163
+ ## Analyze a Hub dataset
164
+
165
+ You can analyze the published Hugging Face dataset directly without scraping GitHub again:
166
+
167
+ ```bash
168
+ uv run slop-farmer analyze \
169
+ --snapshot-dir eval_data/snapshots/gh-live-latest-1000x1000 \
170
+ --ranking-backend hybrid \
171
+ --model "gpt-5-mini?reasoning=low" \
172
+ --output /tmp/gh-live-latest-1000x1000-hybrid.json
173
+ ```
174
+
175
+ This materializes the dataset-viewer parquet export into a local snapshot cache under `eval_data/snapshots/` and writes `analysis-report.json` next to it.
176
+
177
+ Repo-local defaults for `analyze` can be stored in `pyproject.toml` under `[tool.slop-farmer.analyze]`. This repo currently defaults to:
178
+
179
+ - `output-dir = "eval_data"`
180
+ - `hf-repo-id = "evalstate/transformers-pr"`
181
+ - `ranking-backend = "hybrid"`
182
+ - `model = "gpt-5-mini?reasoning=low"`
183
+
184
+ So from repo root you can now usually just run:
185
+
186
+ ```bash
187
+ uv run slop-farmer analyze
188
+ ```
189
+
190
+ ## Cluster open PRs by code scope
191
+
192
+ You can also build holistic PR scope clusters from an existing snapshot:
193
+
194
+ ```bash
195
+ uv run slop-farmer pr-scope \
196
+ --snapshot-dir data/snapshots/20260324T150154Z
197
+ ```
198
+
199
+ By default this writes `pr-scope-clusters.json` next to the snapshot.
200
+
201
+ ## Merge duplicate PR clusters
202
+
203
+ List only the duplicate PR clusters that pass the mergeability gate:
204
+
205
+ ```bash
206
+ uv run slop-farmer duplicate-prs list \
207
+ --report eval_data/snapshots/gh-live-latest-1000x1000/analysis-report-hybrid.json
208
+ ```
209
+
210
+ Then synthesize and publish one minimal upstream PR from the top-ranked mergeable cluster:
211
+
212
+ ```bash
213
+ uv run slop-farmer duplicate-prs merge \
214
+ --report eval_data/snapshots/gh-live-latest-1000x1000/analysis-report-hybrid.json \
215
+ --repo-dir /path/to/transformers
216
+ ```
217
+
218
+ If your local checkout uses a fork as `origin`, point the merge flow at the upstream remote explicitly and relax the file policy when needed:
219
+
220
+ ```bash
221
+ uv run slop-farmer duplicate-prs merge \
222
+ --report eval_data/snapshots/gh-live-latest-1000x1000/analysis-report-hybrid.json \
223
+ --repo-dir /path/to/transformers \
224
+ --upstream-repo huggingface/transformers \
225
+ --upstream-remote upstream \
226
+ --fork-repo YOURNAME/transformers-minimal \
227
+ --fork-remote origin \
228
+ --file-policy allow-docs
229
+ ```
230
+
231
+ ## Import a historical HF checkpoint as a clean local snapshot
232
+
233
+ If an older dataset keeps its richest data under `_checkpoints/<snapshot_id>/`,
234
+ you can promote one of those checkpoints into a normal local snapshot:
235
+
236
+ ```bash
237
+ uv run slop-farmer import-hf-checkpoint \
238
+ --source-repo-id burtenshaw/transformers-pr-slop-dataset \
239
+ --output-dir eval_data
240
+ ```
241
+
242
+ By default this selects the latest viable checkpoint, writes a clean snapshot
243
+ under `eval_data/snapshots/`, and regenerates `links.parquet`,
244
+ `issue_comments.parquet`, and `pr_comments.parquet`.
245
+
246
+ ## Render markdown from an analysis JSON
247
+
248
+ You can turn an existing analysis report into a human-readable markdown file without rerunning clustering:
249
+
250
+ ```bash
251
+ uv run slop-farmer markdown-report \
252
+ --input eval_data/snapshots/hf-latest-100x100/analysis-report-hybrid.json
253
+ ```
254
+
255
+ By default this writes `analysis-report-hybrid.md` next to the JSON and uses the JSON parent directory as the snapshot source for issue and PR titles, links, and latest-activity ordering.
256
+
257
+ ## Render a new contributor report
258
+
259
+ You can also render a reviewer-facing markdown report for contributors who are still new to the repo snapshot:
260
+
261
+ ```bash
262
+ uv run slop-farmer new-contributor-report \
263
+ --snapshot-dir data/snapshots/20260324T000000Z
264
+ ```
265
+
266
+ By default this writes:
267
+
268
+ - `new_contributors.parquet`
269
+ - `new-contributors-report.md`
270
+ - `new-contributors-report.json`
271
+
272
+ next to the snapshot, including GitHub profile links, repo issue/PR search links, and example authored artifacts.
273
+
274
+ ## Full end-to-end workflow
275
+
276
+ You can run scrape + publish + analyze + markdown + dashboard export in one command:
277
+
278
+ ```bash
279
+ uv run slop-farmer full-pipeline \
280
+ --repo huggingface/transformers \
281
+ --dataset YOURNAME/transformers-pr-slop-dataset \
282
+ --model "gpt-5-mini?reasoning=low"
283
+ ```
284
+
285
+ This writes outputs under a repo-anchored workspace directory, for example:
286
+
287
+ - `runs/transformers/data/`
288
+ - `runs/transformers/web/public/data/`
289
+
290
+ Optional age caps are based on `created_at`:
291
+
292
+ ```bash
293
+ --issue-max-age-days 30 \
294
+ --pr-max-age-days 14
295
+ ```
296
+
297
+ ## Validation checks
298
+
299
+ Before committing or wiring new package moves into automation, run:
300
+
301
+ ```bash
302
+ uv run python scripts/enforce_packaging.py
303
+ uv run --extra dev ruff format --check src tests scripts jobs
304
+ uv run --extra dev ruff check src tests scripts jobs
305
+ uv run --extra dev ty check src tests scripts jobs
306
+ uv run --extra dev pytest -q
307
+ ```
308
+
309
+ `scripts/enforce_packaging.py` verifies the coarse package boundaries:
310
+
311
+ - `data` must not import `app`
312
+ - `data` must not import `reports`
313
+ - `reports` must not import `app`
314
+
315
+ ## YAML config-driven runs
316
+
317
+ You can keep repo-specific pipeline defaults in a YAML file and apply them to all
318
+ commands with `--config`.
319
+
320
+ Example: `configs/diffusers.yaml`
321
+
322
+ ```yaml
323
+ repo: huggingface/diffusers
324
+ workspace: runs/diffusers
325
+ dataset_id: evalstate/diffusers-pr
326
+
327
+ pull-requests:
328
+ template_cleanup:
329
+ mode: merge_defaults
330
+ line_patterns:
331
+ - '^d(?:o not merge|ontmerge)\.?$'
332
+ cluster_suppression_rules:
333
+ - id: diffusers_post_release
334
+ title_patterns:
335
+ - '\bpost[- ]release\b'
336
+
337
+ dashboard:
338
+ space_id: evalstate/diffusers-dashboard
339
+ title: Diffusers Dashboard
340
+ window_days: 60
341
+ contributor_window_days: 60
342
+ contributor_max_authors: 0
343
+
344
+ analysis:
345
+ model: gpt-5.4-mini
346
+ ranking_backend: hybrid
347
+ cached_analysis: true
348
+
349
+ scrape:
350
+ fetch-timeline: true
351
+ ```
352
+
353
+ Then commands stay aligned without repeating repo/workspace/window settings:
354
+
355
+ ```bash
356
+ uv run slop-farmer --config configs/diffusers.yaml scrape --publish
357
+ uv run slop-farmer --config configs/diffusers.yaml analyze
358
+ uv run slop-farmer --config configs/diffusers.yaml pr-scope
359
+ uv run slop-farmer --config configs/diffusers.yaml new-contributor-report
360
+ uv run slop-farmer --config configs/diffusers.yaml dashboard-data
361
+ uv run slop-farmer --config configs/diffusers.yaml publish-snapshot
362
+ uv run slop-farmer --config configs/diffusers.yaml deploy-dashboard --refresh-contributors
363
+ ```
364
+
365
+ If you run `analyze` before `publish-snapshot`, the uploaded snapshot will also include
366
+ `analysis-state/`, which makes the hybrid cache portable across machines and reusable in
367
+ later snapshots when `analysis.cached_analysis: true` is enabled.
368
+
369
+ ## Export static dashboard data
370
+
371
+ You can export a slim JSON bundle for the React dashboard:
372
+
373
+ ```bash
374
+ uv run slop-farmer dashboard-data \
375
+ --snapshot-dir data/snapshots/20260324T150154Z \
376
+ --output-dir web/public/data \
377
+ --window-days 14
378
+ ```
379
+
380
+ This writes:
381
+
382
+ - `summary.json`
383
+ - `clusters.json`
384
+ - `prs.json`
385
+ - `contributors.json`
386
+
387
+ The dashboard is intentionally summary-first and links out to GitHub for deep detail.
388
+
389
+ ## Deploy a dashboard to a Hugging Face Space
390
+
391
+ Use the generic deploy script:
392
+
393
+ ```bash
394
+ SPACE_ID=evalstate/openclaw-pr-report \
395
+ PIPELINE_DATA_DIR=runs/openclaw/data \
396
+ SNAPSHOT_DIR=runs/openclaw/data/snapshots/20260324T233649Z \
397
+ SPACE_TITLE="OpenClaw PR Report" \
398
+ DATASET_ID=evalstate/openclaw-pr \
399
+ scripts/deploy_dashboard_space.sh
400
+ ```
401
+
402
+ Repo-specific wrappers are also available:
403
+
404
+ - `scripts/deploy_transformers_dashboard_space.sh`
405
+ - `scripts/deploy_openclaw_dashboard_space.sh`
406
+
407
+ Or use the CLI wrapper with a YAML config:
408
+
409
+ ```bash
410
+ uv run slop-farmer --config configs/diffusers.yaml deploy-dashboard --refresh-contributors
411
+ ```
src/slop_farmer.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ README.md
2
+ pyproject.toml
3
+ src/slop_farmer/__init__.py
4
+ src/slop_farmer/app_config.py
5
+ src/slop_farmer/config.py
6
+ src/slop_farmer.egg-info/PKG-INFO
7
+ src/slop_farmer.egg-info/SOURCES.txt
8
+ src/slop_farmer.egg-info/dependency_links.txt
9
+ src/slop_farmer.egg-info/entry_points.txt
10
+ src/slop_farmer.egg-info/requires.txt
11
+ src/slop_farmer.egg-info/top_level.txt
12
+ src/slop_farmer/app/__init__.py
13
+ src/slop_farmer/app/cli.py
14
+ src/slop_farmer/app/deploy.py
15
+ src/slop_farmer/app/duplicate_prs.py
16
+ src/slop_farmer/app/hf_checkpoint_import.py
17
+ src/slop_farmer/app/pipeline.py
18
+ src/slop_farmer/app/pr_search.py
19
+ src/slop_farmer/app/pr_search_api.py
20
+ src/slop_farmer/app/pr_search_client.py
21
+ src/slop_farmer/app/publish.py
22
+ src/slop_farmer/app/snapshot_state.py
23
+ src/slop_farmer/app/workflow.py
24
+ src/slop_farmer/data/__init__.py
25
+ src/slop_farmer/data/ghreplica_api.py
26
+ src/slop_farmer/data/github_api.py
27
+ src/slop_farmer/data/http.py
28
+ src/slop_farmer/data/links.py
29
+ src/slop_farmer/data/normalize.py
30
+ src/slop_farmer/data/parquet_io.py
31
+ src/slop_farmer/data/search_duckdb.py
32
+ src/slop_farmer/data/snapshot_materialize.py
33
+ src/slop_farmer/data/snapshot_paths.py
34
+ src/slop_farmer/reports/__init__.py
35
+ src/slop_farmer/reports/analysis.py
36
+ src/slop_farmer/reports/analysis_cache.py
37
+ src/slop_farmer/reports/canonical_duplicate_pr.py
38
+ src/slop_farmer/reports/dashboard.py
39
+ src/slop_farmer/reports/duplicate_prs.py
40
+ src/slop_farmer/reports/new_contributor_report.py
41
+ src/slop_farmer/reports/pr_heuristics.py
42
+ src/slop_farmer/reports/pr_scope.py
43
+ src/slop_farmer/reports/pr_search_scope.py
44
+ src/slop_farmer/reports/pr_search_service.py
45
+ src/slop_farmer/reports/user_activity.py
46
+ tests/test_analysis.py
47
+ tests/test_analysis_cache.py
48
+ tests/test_canonical_duplicate_pr.py
49
+ tests/test_cli.py
50
+ tests/test_config.py
51
+ tests/test_dashboard.py
52
+ tests/test_farmer_setup_assets.py
53
+ tests/test_ghreplica_api.py
54
+ tests/test_github_api.py
55
+ tests/test_hf_checkpoint_import.py
56
+ tests/test_http.py
57
+ tests/test_links.py
58
+ tests/test_new_contributor_report.py
59
+ tests/test_normalize.py
60
+ tests/test_pipeline_checkpoint_resume.py
61
+ tests/test_pr_scope.py
62
+ tests/test_pr_search.py
63
+ tests/test_pr_search_api.py
64
+ tests/test_pr_search_client.py
65
+ tests/test_publish.py
66
+ tests/test_snapshot_state.py
67
+ tests/test_update_transformers_dataset.py
68
+ tests/test_viewer_layout.py
src/slop_farmer.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
src/slop_farmer.egg-info/entry_points.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [console_scripts]
2
+ pr-search = slop_farmer.app.pr_search_client:main
3
+ slop-farmer = slop_farmer.app.cli:main
src/slop_farmer.egg-info/requires.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ duckdb>=1.2.2
2
+ pyarrow>=18.0.0
3
+ fastapi>=0.115.0
4
+ huggingface_hub>=0.30.0
5
+ pydantic>=2.11
6
+ PyYAML>=6.0.2
7
+ rank-bm25>=0.2.2
8
+ fast-agent-mcp>=0.6.16
9
+ uvicorn>=0.34.0
10
+
11
+ [dev]
12
+ httpx>=0.28.0
13
+ pytest>=8.3.0
14
+ ruff>=0.11
15
+ ty>=0.0.23
16
+
17
+ [llm]
18
+
19
+ [llm:python_full_version >= "3.13.5"]
20
+ fast-agent-mcp>=0.6.16
src/slop_farmer.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ slop_farmer
src/slop_farmer/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __all__ = ["__version__"]
2
+
3
+ __version__ = "0.1.0"
src/slop_farmer/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (195 Bytes). View file
 
src/slop_farmer/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (209 Bytes). View file
 
src/slop_farmer/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (209 Bytes). View file
 
src/slop_farmer/__pycache__/__init__.cpython-314.pyc ADDED
Binary file (211 Bytes). View file
 
src/slop_farmer/__pycache__/analysis.cpython-310.pyc ADDED
Binary file (90.2 kB). View file
 
src/slop_farmer/__pycache__/analysis.cpython-313.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a9ed2e1fb3883fe35903dce027a4f7af02f18cd8912bbec0d8ad839de5ee3a
3
+ size 141965
src/slop_farmer/__pycache__/analysis.cpython-314.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25676712721446f97224e0554362fb2c5f32d52cd8f9c55a22dcc10c01143eed
3
+ size 177154
src/slop_farmer/__pycache__/analysis_cache.cpython-310.pyc ADDED
Binary file (9.22 kB). View file
 
src/slop_farmer/__pycache__/analysis_cache.cpython-313.pyc ADDED
Binary file (14.9 kB). View file
 
src/slop_farmer/__pycache__/analysis_cache.cpython-314.pyc ADDED
Binary file (18.7 kB). View file
 
src/slop_farmer/__pycache__/app_config.cpython-313.pyc ADDED
Binary file (14.7 kB). View file
 
src/slop_farmer/__pycache__/app_config.cpython-314.pyc ADDED
Binary file (17 kB). View file
 
src/slop_farmer/__pycache__/canonical_duplicate_pr.cpython-310.pyc ADDED
Binary file (16.2 kB). View file
 
src/slop_farmer/__pycache__/canonical_duplicate_pr.cpython-313.pyc ADDED
Binary file (29.2 kB). View file
 
src/slop_farmer/__pycache__/cli.cpython-310.pyc ADDED
Binary file (17.2 kB). View file
 
src/slop_farmer/__pycache__/cli.cpython-312.pyc ADDED
Binary file (30.9 kB). View file
 
src/slop_farmer/__pycache__/cli.cpython-313.pyc ADDED
Binary file (35.3 kB). View file
 
src/slop_farmer/__pycache__/cli.cpython-314.pyc ADDED
Binary file (31.6 kB). View file
 
src/slop_farmer/__pycache__/config.cpython-310.pyc ADDED
Binary file (13.5 kB). View file
 
src/slop_farmer/__pycache__/config.cpython-312.pyc ADDED
Binary file (20.5 kB). View file
 
src/slop_farmer/__pycache__/config.cpython-313.pyc ADDED
Binary file (10.2 kB). View file
 
src/slop_farmer/__pycache__/config.cpython-314.pyc ADDED
Binary file (10.4 kB). View file
 
src/slop_farmer/__pycache__/dashboard.cpython-313.pyc ADDED
Binary file (32.8 kB). View file
 
src/slop_farmer/__pycache__/dashboard.cpython-314.pyc ADDED
Binary file (37.1 kB). View file
 
src/slop_farmer/__pycache__/deploy.cpython-313.pyc ADDED
Binary file (3.02 kB). View file
 
src/slop_farmer/__pycache__/duplicate_prs.cpython-313.pyc ADDED
Binary file (40.4 kB). View file
 
src/slop_farmer/__pycache__/github_api.cpython-310.pyc ADDED
Binary file (9.43 kB). View file
 
src/slop_farmer/__pycache__/github_api.cpython-313.pyc ADDED
Binary file (16.9 kB). View file
 
src/slop_farmer/__pycache__/github_api.cpython-314.pyc ADDED
Binary file (18.1 kB). View file
 
src/slop_farmer/__pycache__/hf_checkpoint_import.cpython-310.pyc ADDED
Binary file (13.3 kB). View file
 
src/slop_farmer/__pycache__/hf_checkpoint_import.cpython-313.pyc ADDED
Binary file (19.3 kB). View file
 
src/slop_farmer/__pycache__/http.cpython-310.pyc ADDED
Binary file (1.55 kB). View file
 
src/slop_farmer/__pycache__/http.cpython-313.pyc ADDED
Binary file (2.22 kB). View file
 
src/slop_farmer/__pycache__/http.cpython-314.pyc ADDED
Binary file (2.41 kB). View file
 
src/slop_farmer/__pycache__/links.cpython-310.pyc ADDED
Binary file (3.01 kB). View file
 
src/slop_farmer/__pycache__/links.cpython-313.pyc ADDED
Binary file (5.12 kB). View file
 
src/slop_farmer/__pycache__/new_contributor_report.cpython-313.pyc ADDED
Binary file (48.4 kB). View file
 
src/slop_farmer/__pycache__/new_contributor_report.cpython-314.pyc ADDED
Binary file (56.7 kB). View file
 
src/slop_farmer/__pycache__/normalize.cpython-310.pyc ADDED
Binary file (6.88 kB). View file