File size: 5,899 Bytes
c979cca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | # VectraYX Reproducibility Makefile
# Reproduces the key experiments from the paper on a single NVIDIA L4 / A10G GPU.
#
# Prerequisites:
# - Python 3.10+
# - CUDA 12.1+
# - pip install -r requirements.txt
# - AWS CLI configured (for SageMaker experiments)
# - Nano checkpoint: nano_sft_v5.pt (download from HuggingFace, see README)
# - Base checkpoint: base_phase3_last.pt (download from HuggingFace, see README)
#
# Usage:
# make install # install dependencies
# make bench-nano # run B1-B5 on Nano 42M (requires nano_sft_v5.pt)
# make bench-base # run B1-B5 on Base 260M (requires base_phase3_last.pt)
# make lora-nano # LoRA tool-use fine-tune on Nano (local GPU)
# make lora-base # LoRA tool-use fine-tune on Base (local GPU)
# make repro # full reproducibility run (bench + lora + bench again)
# make corpus # regenerate tool_sft_mini_v1.jsonl from scratch
PYTHON := python3
NANO_CKPT := checkpoints/nano_sft_v5.pt
BASE_CKPT := checkpoints/base_phase3_last.pt
TOKENIZER := checkpoints/vectrayx_bpe.model
NANO_CFG := configs/nano.json
BASE_CFG := configs/base.json
EVAL_DIR := eval_data
CORPUS := corpus/tool_sft_mini_v1.jsonl
LORA_OUT := checkpoints/lora_out
.PHONY: install bench-nano bench-base lora-nano lora-base repro corpus clean help
help:
@echo "VectraYX Reproducibility Makefile"
@echo ""
@echo "Targets:"
@echo " install Install Python dependencies"
@echo " bench-nano Run B1-B5 benchmark on Nano 42M"
@echo " bench-base Run B1-B5 benchmark on Base 260M"
@echo " lora-nano LoRA fine-tune Nano 42M on tool-use corpus"
@echo " lora-base LoRA fine-tune Base 260M on tool-use corpus"
@echo " repro Full reproducibility run"
@echo " corpus Regenerate tool_sft_mini_v1.jsonl"
@echo " clean Remove generated checkpoints and results"
install:
pip install -r requirements.txt
# ββ Benchmark ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
bench-nano: $(NANO_CKPT) $(TOKENIZER)
$(PYTHON) eval/benchmark.py \
--config $(NANO_CFG) \
--tokenizer $(TOKENIZER) \
--checkpoint $(NANO_CKPT) \
--data-dir $(EVAL_DIR) \
--out results/bench_nano_baseline.json
@echo "Results: results/bench_nano_baseline.json"
bench-base: $(BASE_CKPT) $(TOKENIZER)
$(PYTHON) eval/benchmark.py \
--config $(BASE_CFG) \
--tokenizer $(TOKENIZER) \
--checkpoint $(BASE_CKPT) \
--data-dir $(EVAL_DIR) \
--out results/bench_base_baseline.json
@echo "Results: results/bench_base_baseline.json"
# ββ LoRA fine-tune βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
lora-nano: $(NANO_CKPT) $(TOKENIZER) $(CORPUS)
mkdir -p $(LORA_OUT)/nano
$(PYTHON) training/finetune_lora_tools.py \
--config $(NANO_CFG) \
--tokenizer $(TOKENIZER) \
--resume $(NANO_CKPT) \
--tool-corpus $(CORPUS) \
--out $(LORA_OUT)/nano \
--lora-rank 16 --lora-alpha 32 \
--batch-size 16 --grad-accum 4 \
--epochs 5 --lr 2e-4 --seed 42
$(PYTHON) eval/run_inference_lora.py \
--base-checkpoint $(NANO_CKPT) \
--lora-checkpoint $(LORA_OUT)/nano/final_lora_only.pt \
--config $(NANO_CFG) \
--tokenizer $(TOKENIZER) \
--data-dir $(EVAL_DIR) \
--out results/bench_nano_lora_s42.json
@echo "Results: results/bench_nano_lora_s42.json"
lora-base: $(BASE_CKPT) $(TOKENIZER) $(CORPUS)
mkdir -p $(LORA_OUT)/base
$(PYTHON) training/finetune_lora_tools.py \
--config $(BASE_CFG) \
--tokenizer $(TOKENIZER) \
--resume $(BASE_CKPT) \
--tool-corpus $(CORPUS) \
--out $(LORA_OUT)/base \
--lora-rank 16 --lora-alpha 32 \
--batch-size 8 --grad-accum 8 \
--epochs 5 --lr 2e-4 --seed 42
$(PYTHON) eval/run_inference_lora.py \
--base-checkpoint $(BASE_CKPT) \
--lora-checkpoint $(LORA_OUT)/base/final_lora_only.pt \
--config $(BASE_CFG) \
--tokenizer $(TOKENIZER) \
--data-dir $(EVAL_DIR) \
--out results/bench_base_lora_s42.json
@echo "Results: results/bench_base_lora_s42.json"
# ββ Full reproducibility run βββββββββββββββββββββββββββββββββββββββββββββββββββ
repro: install bench-nano bench-base lora-nano lora-base
@echo ""
@echo "=== Reproducibility Run Complete ==="
@echo "Expected results (from paper, Table 3):"
@echo " Nano baseline B4=0.000"
@echo " Base baseline B4=0.000"
@echo " Nano + LoRA B4=0.145 Β± 0.046 (seed 42: 0.220)"
@echo " Base + LoRA B4=0.580"
@echo ""
@echo "Your results:"
@$(PYTHON) -c "import json; \
r = {k: json.load(open(f'results/{k}.json')) for k in \
['bench_nano_baseline','bench_base_baseline','bench_nano_lora_s42','bench_base_lora_s42'] \
if __import__('pathlib').Path(f'results/{k}.json').exists()}; \
[print(f' {k}: B4={v.get(\"B4_tooluse\",\"N/A\")}') for k,v in r.items()]"
# ββ Corpus generation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
corpus:
$(PYTHON) corpus/build_mini_tool_corpus.py \
--size 2801 \
--out corpus/tool_sft_mini_v1_repro.jsonl
@echo "Generated: corpus/tool_sft_mini_v1_repro.jsonl"
@echo "Note: compare with corpus/tool_sft_mini_v1.jsonl (released version)"
# ββ Cleanup ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
clean:
rm -rf checkpoints/lora_out results/
@echo "Cleaned generated files. Checkpoints preserved."
|