| """ |
| Phase 1: Optimal Modulo Scheduling via Integer Linear Programming (ZLP). |
| |
| Based on Section 3.1, 4.1, and 5.1 of the paper. |
| Uses the ILP formulation from Stoutchinin et al. (referenced as [stoutchinin-ilp]). |
| |
| The modulo scheduling problem: |
| Given G = (V, E) and target initiation interval I, |
| find M: V -> [0, L) such that: |
| 1. Dependence: ∀(u,v,d,δ)∈E: M(v) - M(u) + I·δ ≥ d |
| 2. Resource: modular RRT fits within machine capacities |
| 3. Minimize L (schedule length) subject to the above |
| |
| Uses CBC solver via PuLP. |
| """ |
|
|
| import pulp |
| import numpy as np |
| from typing import Dict, List, Optional, Tuple |
| from twill.graph import DependenceGraph, Instruction, DependenceEdge |
|
|
|
|
| class ModuloScheduleResult: |
| """Result of modulo scheduling. |
| |
| Attributes: |
| schedule: Dict mapping instruction name -> clock cycle M(v) |
| initiation_interval: I |
| length: L (total schedule length) |
| num_copies: ceil(L/I) - number of overlapping iterations |
| """ |
|
|
| def __init__(self, schedule: Dict[str, int], I: int): |
| self.schedule = schedule |
| self.initiation_interval = I |
| self._length = None |
|
|
| @property |
| def I(self) -> int: |
| return self.initiation_interval |
|
|
| @property |
| def length(self) -> int: |
| """L: total schedule length (max M(v) + cycles(v) across all instructions).""" |
| if self._length is not None: |
| return self._length |
| return max(self.schedule.values()) + 1 |
|
|
| @length.setter |
| def length(self, val: int): |
| self._length = val |
|
|
| @property |
| def num_copies(self) -> int: |
| """ceil(L/I) - number of overlapping iteration copies.""" |
| return int(np.ceil(self.length / self.I)) |
|
|
| def __repr__(self): |
| return (f"ModuloSchedule(I={self.I}, L={self.length}, copies={self.num_copies}, " |
| f"schedule={self.schedule})") |
|
|
|
|
| def optimal_modulo_schedule( |
| graph: DependenceGraph, |
| target_I: int, |
| solver_time_limit: int = 120, |
| verbose: bool = False, |
| ) -> Optional[ModuloScheduleResult]: |
| """Find an optimal modulo schedule with the given initiation interval. |
| |
| Uses ILP formulation: minimize L subject to dependence and resource constraints. |
| |
| Args: |
| graph: The loop dependence graph |
| target_I: Target initiation interval |
| solver_time_limit: Time limit for the solver in seconds |
| verbose: Print solver output |
| |
| Returns: |
| ModuloScheduleResult if feasible, None if infeasible for this I |
| """ |
| I = target_I |
| V = graph.V |
| E = graph.E |
| machine = graph.machine |
| n = len(V) |
|
|
| |
| prob = pulp.LpProblem(f"ModuloSchedule_I{I}", pulp.LpMinimize) |
|
|
| |
| |
| max_cycles = max(v.cycles for v in V) |
| big_M = I * (n + 1) * max_cycles |
| |
| M = {} |
| for v in V: |
| M[v.name] = pulp.LpVariable(f"M_{v.name}", lowBound=0, upBound=big_M, cat='Integer') |
|
|
| |
| L = pulp.LpVariable("L", lowBound=1, upBound=big_M, cat='Integer') |
|
|
| |
| prob += L |
|
|
| |
| for v in V: |
| prob += L >= M[v.name] + v.cycles |
|
|
| |
| |
| |
| for e in E: |
| prob += M[e.dst] - M[e.src] >= e.delay - I * e.iteration_delay |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| slot = {} |
| for v in V: |
| for s in range(I): |
| slot[v.name, s] = pulp.LpVariable(f"slot_{v.name}_{s}", cat='Binary') |
| |
| prob += pulp.lpSum(slot[v.name, s] for s in range(I)) == 1 |
| |
| q_v = pulp.LpVariable(f"q_{v.name}", lowBound=0, upBound=big_M // max(I, 1), cat='Integer') |
| prob += M[v.name] == q_v * I + pulp.lpSum(s * slot[v.name, s] for s in range(I)) |
|
|
| |
| |
| |
| for t in range(I): |
| for f_idx, f_name in enumerate(machine.functional_units): |
| cap = machine.capacity(f_name) |
| if cap <= 0: |
| continue |
| |
| terms = [] |
| for v in V: |
| for c in range(v.cycles): |
| usage = int(v.rrt[c, f_idx]) |
| if usage > 0: |
| s = (t - c) % I |
| terms.append(usage * slot[v.name, s]) |
| |
| if terms: |
| prob += pulp.lpSum(terms) <= cap |
|
|
| |
| solver = pulp.PULP_CBC_CMD(msg=1 if verbose else 0, timeLimit=solver_time_limit) |
| status = prob.solve(solver) |
|
|
| if status != pulp.constants.LpStatusOptimal: |
| return None |
|
|
| |
| schedule = {} |
| for v in V: |
| schedule[v.name] = int(round(pulp.value(M[v.name]))) |
|
|
| result = ModuloScheduleResult(schedule, I) |
| result.length = int(round(pulp.value(L))) |
|
|
| return result |
|
|
|
|
| def compute_modular_rrt( |
| graph: DependenceGraph, |
| schedule: ModuloScheduleResult, |
| ) -> np.ndarray: |
| """Compute the modular RRT for a given schedule. |
| |
| The modular RRT shows resource usage per time slot in [0, I). |
| modular_rrt[t, f] = total usage of functional unit f at time slot t in steady state. |
| |
| Returns: |
| np.ndarray of shape (I, num_functional_units) |
| """ |
| I = schedule.I |
| num_fus = graph.machine.num_functional_units |
| mod_rrt = np.zeros((I, num_fus), dtype=int) |
|
|
| for v in graph.V: |
| m_v = schedule.schedule[v.name] |
| for c in range(v.cycles): |
| t = (m_v + c) % I |
| for f in range(num_fus): |
| mod_rrt[t, f] += int(v.rrt[c, f]) |
|
|
| return mod_rrt |
|
|
|
|
| def validate_schedule( |
| graph: DependenceGraph, |
| schedule: ModuloScheduleResult, |
| ) -> Tuple[bool, List[str]]: |
| """Validate that a modulo schedule satisfies all constraints. |
| |
| Returns: |
| Tuple of (is_valid, list of violation messages) |
| """ |
| violations = [] |
| I = schedule.I |
| M = schedule.schedule |
|
|
| |
| for e in graph.E: |
| m_src = M[e.src] |
| m_dst = M[e.dst] |
| if m_dst + I * e.iteration_delay < m_src + e.delay: |
| violations.append( |
| f"Dependence violation: {e.src}({m_src}) -> {e.dst}({m_dst}), " |
| f"need M({e.dst}) + I*{e.iteration_delay} >= M({e.src}) + {e.delay}, " |
| f"got {m_dst + I * e.iteration_delay} < {m_src + e.delay}" |
| ) |
|
|
| |
| mod_rrt = compute_modular_rrt(graph, schedule) |
| cap_vec = graph.machine.capacity_vector |
| for t in range(I): |
| for f in range(graph.machine.num_functional_units): |
| if mod_rrt[t, f] > cap_vec[f]: |
| violations.append( |
| f"Resource violation at t={t}, {graph.machine.functional_units[f]}: " |
| f"usage={mod_rrt[t, f]} > capacity={cap_vec[f]}" |
| ) |
|
|
| return len(violations) == 0, violations |
|
|