""" Twill: Optimal Software Pipelining and Warp Specialization for Tensor Core GPUs Implementation of the paper by Rupanshu Soi et al. (arXiv:2512.18134) with GauS differentiable solver by Yaohui Cai et al. (arXiv:2602.20427) Twill formulates the joint SWP + WS optimization as: Phase 1: ZLP-based Optimal Modulo Scheduling (CBC solver via PuLP) Phase 2: SMT-based Joint SWP + WS (Z3 solver) GauS provides a scalable differentiable alternative using: Gaussian reparameterization + Augmented Lagrangian Method (ALM) With cost normalization to make cycle counts tractable. """ from twill.graph import DependenceGraph, Instruction, DependenceEdge, MachineDescription from twill.cost_normalization import normalize_costs from twill.modulo_scheduler import optimal_modulo_schedule from twill.smt_joint import swp_and_ws from twill.twill_solver import twill_solve from twill.codegen import generate_pipelined_code from twill.visualization import visualize_schedule from twill.gaus_solver import GauSSolver, GausGraph, gaus_solve_twill_graph __version__ = "0.2.0" __all__ = [ "DependenceGraph", "Instruction", "DependenceEdge", "MachineDescription", "normalize_costs", "optimal_modulo_schedule", "swp_and_ws", "twill_solve", "generate_pipelined_code", "visualize_schedule", "GauSSolver", "GausGraph", "gaus_solve_twill_graph", ]