chakravyuh / Makefile
UjjwalPardeshi
deploy: latest main to HF Space
03815d6
# Chakravyuh — reproducibility + dev convenience targets
#
# Designed so a fresh clone can produce every README claim with one command.
# All targets assume Python 3.11+ and a fresh virtualenv.
PYTHON ?= python
PIP ?= pip
ADAPTER ?= ujjwalpardeshi/chakravyuh-analyzer-lora-v2
BENCH ?= data/chakravyuh-bench-v0/scenarios.jsonl
SEED ?= 42
.PHONY: help install test lint format reproduce \
eval-v2 bootstrap smoke-test link-check link-check-http clean
help:
@echo "Chakravyuh make targets:"
@echo " install Install repo + LLM + eval extras"
@echo " test Run pytest (341 collected; 338 pass + 3 skip)"
@echo " smoke-test In-process env reset+step smoke test (~5s, no GPU)"
@echo " link-check Check every local README link / asset path resolves"
@echo " link-check-http HEAD-probe every http(s) README link (allowed-fail; external)"
@echo " lint Run ruff lint (no auto-fix)"
@echo " format Run black + ruff --fix"
@echo " eval-v2 Re-run v2 evaluation against the bench (~10 min CPU cached, ~2-4h fresh GPU)"
@echo " bootstrap Compute 10k-iteration bootstrap CIs from the eval (~1 min CPU)"
@echo " reproduce eval-v2 + bootstrap (numbers within 0.5pp of README; set CHAKRAVYUH_SKIP_INFERENCE=1 to use cached scores)"
@echo " clean Remove pyc / pycache / build artifacts"
install:
$(PIP) install -e '.[llm,eval]'
test:
$(PYTHON) -m pytest tests/ -v
smoke-test:
$(PYTHON) scripts/smoke_test.py
link-check:
@echo "Checking local file references in README.md..."
@missing=0; \
for path in $$(grep -oE '\]\([^)]+\)' README.md | sed -E 's/\]\(([^)]+)\).*/\1/' | grep -v '^http' | grep -v '^#' | grep -v '^mailto:' | cut -d'#' -f1); do \
[ -e "$$path" ] || { echo "BROKEN: $$path"; missing=$$((missing+1)); }; \
done; \
if [ "$$missing" -eq 0 ]; then echo "All local README links resolve."; else echo "$$missing broken link(s)."; exit 1; fi
# Best-effort external link probe. HEAD requests with a short timeout; we accept
# 2xx, 301/302 redirects, and 403 (some hosts block bots — newsmeter, pib, etc.)
# but flag 404/410/500. Skips localhost URLs and URLs containing shell glue
# (` ` or `&&`) which are illustrative command snippets, not real links. This
# target is allowed to fail in CI because external availability is flaky.
link-check-http:
@echo "HEAD-probing http(s) links in README.md..."
@bad=0; checked=0; \
urls=$$(grep -oE 'https?://[^)[:space:]]+' README.md | sed -E 's/[`",]+$$//' | sort -u | grep -v 'localhost' | grep -v ' ' | grep -v '&&'); \
for url in $$urls; do \
checked=$$((checked+1)); \
code=$$(curl -sS -L --max-time 8 -o /dev/null -w '%{http_code}' -A 'Chakravyuh-LinkCheck/1.0' -I "$$url" 2>/dev/null || echo 000); \
case "$$code" in \
2*|301|302|307|308|401|403|405|406|429) ;; \
*) echo "BROKEN[$$code]: $$url"; bad=$$((bad+1)) ;; \
esac; \
done; \
echo "Probed $$checked URL(s); $$bad failure(s)."; \
if [ "$$bad" -ne 0 ]; then exit 1; fi
lint:
$(PYTHON) -m ruff check chakravyuh_env/ training/ eval/ server/ tests/
format:
$(PYTHON) -m black chakravyuh_env/ training/ eval/ server/ tests/
$(PYTHON) -m ruff check --fix chakravyuh_env/ training/ eval/ server/ tests/
eval-v2:
$(PYTHON) eval/mode_c_real_cases.py \
--model-id $(ADAPTER) \
--bench $(BENCH) \
--seed $(SEED) \
--output logs/eval_v2_reproduce.json
bootstrap: eval-v2
$(PYTHON) eval/bootstrap_ci.py \
--eval-file logs/eval_v2_reproduce.json \
--iterations 10000 \
--output logs/bootstrap_v2_reproduce.json
reproduce: install bootstrap
@echo ""
@echo "Reproduction complete. Compare these against README claims (target: within 0.5pp):"
@echo " - logs/eval_v2_reproduce.json # detection, FPR, F1, per-difficulty"
@echo " - logs/bootstrap_v2_reproduce.json # 95% CI bands"
clean:
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
find . -type f -name '*.pyc' -delete 2>/dev/null || true
rm -rf build/ dist/ *.egg-info/