| from __future__ import annotations |
| import json, tempfile |
| from pathlib import Path |
| from dir2md.core import Config, generate_markdown_report |
|
|
|
|
| def _make_repo(tmp: Path) -> Path: |
| (tmp/"src").mkdir(parents=True, exist_ok=True) |
| |
| long_content = "\n".join([f" print('line {i}')" for i in range(100)]) |
| (tmp/"src"/"a.py").write_text(f""" |
| import os |
| |
| class A: pass |
| |
| def foo(): |
| {long_content} |
| return 42 |
| """, encoding="utf-8") |
| (tmp/"src"/"b.py").write_text(""" |
| import sys |
| |
| def bar(): |
| return 43 |
| """, encoding="utf-8") |
| |
| (tmp/"src"/"b_copy.py").write_text((tmp/"src"/"b.py").read_text(encoding="utf-8"), encoding="utf-8") |
| (tmp/"README.md").write_text("# Title\n\nSome text\n", encoding="utf-8") |
| return tmp |
|
|
|
|
| def test_budget_and_modes(tmp_path: Path): |
| root = _make_repo(tmp_path) |
| cfg = Config( |
| root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
| respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=2000, |
| include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
| llm_mode="summary", budget_tokens=200, max_file_tokens=1200, dedup_bits=16, |
| sample_head=120, sample_tail=40, strip_comments=False, emit_manifest=True, |
| preset="pro", explain_capsule=True, |
| ) |
| md = generate_markdown_report(cfg) |
| assert "Estimated tokens (prompt):" in md |
| mpath = (root/"OUT.manifest.json") |
| assert mpath.exists() |
| man = json.loads(mpath.read_text(encoding="utf-8")) |
| |
| paths = {entry["path"] for entry in man["files"]} |
| assert any(p.endswith("a.py") for p in paths) |
| assert any(p.endswith("b.py") for p in paths) |
|
|
|
|
| def test_ref_mode_manifest(tmp_path: Path): |
| root = _make_repo(tmp_path) |
| cfg = Config( |
| root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
| respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=2000, |
| include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
| llm_mode="ref", budget_tokens=120, max_file_tokens=1200, dedup_bits=16, |
| sample_head=120, sample_tail=40, strip_comments=False, emit_manifest=True, |
| preset="pro", explain_capsule=False, |
| ) |
| md = generate_markdown_report(cfg) |
| man = json.loads((root/"OUT.manifest.json").read_text(encoding="utf-8")) |
| assert "stats" in man |
| assert "files" in man |
| assert all("sha256" in e for e in man["files"]) |
|
|
|
|
| def test_inline_sampling(tmp_path: Path): |
| root = _make_repo(tmp_path) |
| |
| cfg = Config( |
| root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
| respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=50, |
| include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
| llm_mode="inline", budget_tokens=50, max_file_tokens=30, dedup_bits=0, |
| sample_head=5, sample_tail=3, strip_comments=False, emit_manifest=False, |
| preset="pro", explain_capsule=True, |
| ) |
| md = generate_markdown_report(cfg) |
| assert "truncated middle" in md |
| assert "why: inline" in md |
|
|
| def test_masking(tmp_path: Path): |
| root = _make_repo(tmp_path) |
| |
| secret_content = "My AWS key is AKIAIOSFODNN7EXAMPLE" |
| (root / ".env").write_text(secret_content, encoding="utf-8") |
|
|
| cfg = Config( |
| root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
| respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=2000, |
| include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
| llm_mode="inline", budget_tokens=1000, max_file_tokens=1000, dedup_bits=0, |
| sample_head=120, sample_tail=40, strip_comments=False, emit_manifest=False, |
| preset="pro", explain_capsule=False, no_timestamp=True, |
| masking_mode="basic", |
| ) |
| md = generate_markdown_report(cfg) |
|
|
| assert secret_content not in md |
| assert "[*** MASKED_SECRET ***]" in md |
|
|
| |
| cfg.masking_mode = "off" |
| md_unmasked = generate_markdown_report(cfg) |
| assert secret_content in md_unmasked |
| assert "[*** MASKED_SECRET ***]" not in md_unmasked |
|
|