| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import argparse |
| import torch |
| from pathlib import Path |
| import importlib |
| import sys |
|
|
| parser = argparse.ArgumentParser() |
| parser.add_argument("--jit", required=True, help="Path to existing JIT model (used to extract state_dict)") |
| parser.add_argument("--out", required=True, help="Output path for new JIT model on CUDA") |
| parser.add_argument("--py_module", required=False, help="Python import path for model (e.g. jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4)", default=None) |
| parser.add_argument("--class_name", required=False, help="Name of model class in module", default=None) |
| parser.add_argument("--seq_len", type=int, default=8, help="Sequence length for example input (short is fine for trace)") |
| parser.add_argument("--vocab_size", type=int, default=50257, help="Vocab size for dummy input") |
| parser.add_argument("--use_script", action="store_true", help="Use torch.jit.script instead of trace (requires model to be scriptable)") |
| args = parser.parse_args() |
|
|
| jit_path = Path(args.jit) |
| out_path = Path(args.out) |
| if not jit_path.exists(): |
| print("JIT file not found:", jit_path) |
| sys.exit(1) |
|
|
| |
| print("Loading state_dict from existing JIT (cpu)...") |
| jit = torch.jit.load(str(jit_path), map_location='cpu') |
| try: |
| sd = jit.state_dict() |
| print("state_dict keys:", list(sd.keys())[:10], "...") |
| except Exception as e: |
| print("Failed to obtain state_dict() from JIT:", e) |
| sd = None |
|
|
| |
| if args.py_module is None or args.class_name is None: |
| print("ERROR: You must provide --py_module and --class_name to reconstruct the Python model.") |
| print("Example: --py_module jirackkit.src.main.python.gpt2_jit.JiRack_H4_L2_V50257_D768_MSL8192_FF768x4 --class_name GPTPyTorch") |
| sys.exit(1) |
|
|
| print("Importing Python model:", args.py_module, args.class_name) |
| module = importlib.import_module(args.py_module) |
| ModelClass = getattr(module, args.class_name) |
|
|
| |
| MODEL_KWARGS = {} |
|
|
| print("Instantiating Python model...") |
| model = ModelClass(**MODEL_KWARGS) |
|
|
| |
| if sd is not None: |
| try: |
| model.load_state_dict(sd) |
| print("Weights loaded into Python model from JIT.state_dict().") |
| except Exception as e: |
| print("Failed to load state_dict into Python model:", e) |
| print("You may need to adapt keys or load partial weights. Exiting.") |
| sys.exit(1) |
|
|
| |
| if not torch.cuda.is_available(): |
| print("CUDA not available on this machine. Aborting.") |
| sys.exit(1) |
| device = torch.device('cuda:0') |
| model.to(device) |
| model.eval() |
|
|
| |
| seq_len = args.seq_len |
| vocab = args.vocab_size |
| example_input = torch.randint(0, vocab, (1, seq_len), dtype=torch.long, device=device) |
|
|
| |
| print("Tracing/script-model on CUDA. This will produce a JIT module whose constants are on CUDA.") |
| if args.use_script: |
| print("Using torch.jit.script...") |
| scripted = torch.jit.script(model) |
| else: |
| print("Using torch.jit.trace with example input of shape", example_input.shape) |
| scripted = torch.jit.trace(model, example_input) |
|
|
| |
| out_path.parent.mkdir(parents=True, exist_ok=True) |
| scripted.save(str(out_path)) |
| print("Saved new JIT (CUDA) model to:", out_path) |
| print("Done. Replace your old model file with this one (keep backup).") |
|
|