Spaces:
Running
Running
| # Chakravyuh — reproducibility + dev convenience targets | |
| # | |
| # Designed so a fresh clone can produce every README claim with one command. | |
| # All targets assume Python 3.11+ and a fresh virtualenv. | |
| PYTHON ?= python | |
| PIP ?= pip | |
| ADAPTER ?= ujjwalpardeshi/chakravyuh-analyzer-lora-v2 | |
| BENCH ?= data/chakravyuh-bench-v0/scenarios.jsonl | |
| SEED ?= 42 | |
| .PHONY: help install test lint format reproduce \ | |
| eval-v2 bootstrap smoke-test link-check link-check-http clean | |
| help: | |
| @echo "Chakravyuh make targets:" | |
| @echo " install Install repo + LLM + eval extras" | |
| @echo " test Run pytest (341 collected; 338 pass + 3 skip)" | |
| @echo " smoke-test In-process env reset+step smoke test (~5s, no GPU)" | |
| @echo " link-check Check every local README link / asset path resolves" | |
| @echo " link-check-http HEAD-probe every http(s) README link (allowed-fail; external)" | |
| @echo " lint Run ruff lint (no auto-fix)" | |
| @echo " format Run black + ruff --fix" | |
| @echo " eval-v2 Re-run v2 evaluation against the bench (~10 min CPU cached, ~2-4h fresh GPU)" | |
| @echo " bootstrap Compute 10k-iteration bootstrap CIs from the eval (~1 min CPU)" | |
| @echo " reproduce eval-v2 + bootstrap (numbers within 0.5pp of README; set CHAKRAVYUH_SKIP_INFERENCE=1 to use cached scores)" | |
| @echo " clean Remove pyc / pycache / build artifacts" | |
| install: | |
| $(PIP) install -e '.[llm,eval]' | |
| test: | |
| $(PYTHON) -m pytest tests/ -v | |
| smoke-test: | |
| $(PYTHON) scripts/smoke_test.py | |
| link-check: | |
| @echo "Checking local file references in README.md..." | |
| @missing=0; \ | |
| for path in $$(grep -oE '\]\([^)]+\)' README.md | sed -E 's/\]\(([^)]+)\).*/\1/' | grep -v '^http' | grep -v '^#' | grep -v '^mailto:' | cut -d'#' -f1); do \ | |
| [ -e "$$path" ] || { echo "BROKEN: $$path"; missing=$$((missing+1)); }; \ | |
| done; \ | |
| if [ "$$missing" -eq 0 ]; then echo "All local README links resolve."; else echo "$$missing broken link(s)."; exit 1; fi | |
| # Best-effort external link probe. HEAD requests with a short timeout; we accept | |
| # 2xx, 301/302 redirects, and 403 (some hosts block bots — newsmeter, pib, etc.) | |
| # but flag 404/410/500. Skips localhost URLs and URLs containing shell glue | |
| # (` ` or `&&`) which are illustrative command snippets, not real links. This | |
| # target is allowed to fail in CI because external availability is flaky. | |
| link-check-http: | |
| @echo "HEAD-probing http(s) links in README.md..." | |
| @bad=0; checked=0; \ | |
| urls=$$(grep -oE 'https?://[^)[:space:]]+' README.md | sed -E 's/[`",]+$$//' | sort -u | grep -v 'localhost' | grep -v ' ' | grep -v '&&'); \ | |
| for url in $$urls; do \ | |
| checked=$$((checked+1)); \ | |
| code=$$(curl -sS -L --max-time 8 -o /dev/null -w '%{http_code}' -A 'Chakravyuh-LinkCheck/1.0' -I "$$url" 2>/dev/null || echo 000); \ | |
| case "$$code" in \ | |
| 2*|301|302|307|308|401|403|405|406|429) ;; \ | |
| *) echo "BROKEN[$$code]: $$url"; bad=$$((bad+1)) ;; \ | |
| esac; \ | |
| done; \ | |
| echo "Probed $$checked URL(s); $$bad failure(s)."; \ | |
| if [ "$$bad" -ne 0 ]; then exit 1; fi | |
| lint: | |
| $(PYTHON) -m ruff check chakravyuh_env/ training/ eval/ server/ tests/ | |
| format: | |
| $(PYTHON) -m black chakravyuh_env/ training/ eval/ server/ tests/ | |
| $(PYTHON) -m ruff check --fix chakravyuh_env/ training/ eval/ server/ tests/ | |
| eval-v2: | |
| $(PYTHON) eval/mode_c_real_cases.py \ | |
| --model-id $(ADAPTER) \ | |
| --bench $(BENCH) \ | |
| --seed $(SEED) \ | |
| --output logs/eval_v2_reproduce.json | |
| bootstrap: eval-v2 | |
| $(PYTHON) eval/bootstrap_ci.py \ | |
| --eval-file logs/eval_v2_reproduce.json \ | |
| --iterations 10000 \ | |
| --output logs/bootstrap_v2_reproduce.json | |
| reproduce: install bootstrap | |
| @echo "" | |
| @echo "Reproduction complete. Compare these against README claims (target: within 0.5pp):" | |
| @echo " - logs/eval_v2_reproduce.json # detection, FPR, F1, per-difficulty" | |
| @echo " - logs/bootstrap_v2_reproduce.json # 95% CI bands" | |
| clean: | |
| find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true | |
| find . -type f -name '*.pyc' -delete 2>/dev/null || true | |
| rm -rf build/ dist/ *.egg-info/ | |