|
|
| from collections import defaultdict, deque
|
| import heapq
|
|
|
| SLOT_LIMITS = {
|
| "alu": 12,
|
| "valu": 6,
|
| "load": 2,
|
| "store": 2,
|
| "flow": 1,
|
| "debug": 64,
|
| }
|
|
|
| class Node:
|
| def __init__(self, id, engine, args, desc=""):
|
| self.id = id
|
| self.engine = engine
|
| self.args = args
|
| self.desc = desc
|
| self.parents = []
|
| self.children = []
|
| self.priority = 0
|
| self.latency = 1
|
|
|
| def add_child(self, node):
|
| self.children.append(node)
|
| node.parents.append(self)
|
|
|
| class Scheduler:
|
| def __init__(self):
|
| self.nodes = []
|
| self.id_counter = 0
|
| self.scratch_reads = defaultdict(list)
|
| self.scratch_writes = defaultdict(list)
|
|
|
| def add_op(self, engine, args, desc=""):
|
| node = Node(self.id_counter, engine, args, desc)
|
| self.nodes.append(node)
|
| self.id_counter += 1
|
|
|
|
|
|
|
|
|
|
|
| reads, writes = self._get_rw(engine, args)
|
|
|
|
|
| for r in reads:
|
| if r in self.scratch_writes and self.scratch_writes[r]:
|
|
|
| last_writer = self.scratch_writes[r][-1]
|
| last_writer.add_child(node)
|
|
|
|
|
|
|
| for w in writes:
|
| if w in self.scratch_writes and self.scratch_writes[w]:
|
| last_writer = self.scratch_writes[w][-1]
|
| last_writer.add_child(node)
|
|
|
|
|
|
|
| for w in writes:
|
| if w in self.scratch_reads and self.scratch_reads[w]:
|
| for reader in self.scratch_reads[w]:
|
| if reader != node:
|
| reader.add_child(node)
|
|
|
|
|
| for r in reads:
|
| self.scratch_reads[r].append(node)
|
| for w in writes:
|
| self.scratch_writes[w].append(node)
|
|
|
| return node
|
|
|
| def _get_rw(self, engine, args):
|
| reads = []
|
| writes = []
|
|
|
|
|
| def is_addr(x): return isinstance(x, int)
|
|
|
| if engine == "alu":
|
|
|
| op, dest, a1, a2 = args
|
| writes.append(dest)
|
| reads.append(a1)
|
| reads.append(a2)
|
| elif engine == "valu":
|
|
|
| op = args[0]
|
| if op == "vbroadcast":
|
|
|
| writes.extend([args[1] + i for i in range(8)])
|
| reads.append(args[2])
|
| elif op == "multiply_add":
|
|
|
| writes.extend([args[1] + i for i in range(8)])
|
| reads.extend([args[2] + i for i in range(8)])
|
| reads.extend([args[3] + i for i in range(8)])
|
| reads.extend([args[4] + i for i in range(8)])
|
| else:
|
|
|
| writes.extend([args[1] + i for i in range(8)])
|
| reads.extend([args[2] + i for i in range(8)])
|
| reads.extend([args[3] + i for i in range(8)])
|
| elif engine == "load":
|
| op = args[0]
|
| if op == "const":
|
| writes.append(args[1])
|
| elif op == "load":
|
| writes.append(args[1])
|
| reads.append(args[2])
|
| elif op == "vload":
|
| writes.extend([args[1] + i for i in range(8)])
|
| reads.append(args[2])
|
|
|
| elif engine == "store":
|
| op = args[0]
|
| if op == "vstore":
|
| reads.append(args[1])
|
| reads.extend([args[2] + i for i in range(8)])
|
|
|
| elif engine == "flow":
|
| op = args[0]
|
| if op == "vselect":
|
|
|
| writes.extend([args[1] + i for i in range(8)])
|
| reads.extend([args[2] + i for i in range(8)])
|
| reads.extend([args[3] + i for i in range(8)])
|
| reads.extend([args[4] + i for i in range(8)])
|
| elif op == "select":
|
|
|
| writes.append(args[1])
|
| reads.append(args[2])
|
| reads.append(args[3])
|
| reads.append(args[4])
|
| elif op == "add_imm":
|
|
|
| writes.append(args[1])
|
| reads.append(args[2])
|
| elif op == "cond_jump" or op == "cond_jump_rel":
|
|
|
| reads.append(args[1])
|
|
|
| pass
|
|
|
|
|
|
|
| return reads, writes
|
|
|
| def schedule(self):
|
|
|
| self._calc_priorities()
|
|
|
| ready = []
|
| in_degree = defaultdict(int)
|
|
|
| for node in self.nodes:
|
| in_degree[node] = len(node.parents)
|
| if in_degree[node] == 0:
|
| heapq.heappush(ready, (-node.priority, node.id, node))
|
|
|
| instructions = []
|
|
|
| while ready or any(count > 0 for count in in_degree.values()):
|
|
|
| cycle_ops = defaultdict(list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| deferred = []
|
|
|
|
|
| usage = {k:0 for k in SLOT_LIMITS}
|
|
|
|
|
|
|
|
|
| curr_cycle_nodes = []
|
|
|
| while ready:
|
| prio, nid, node = heapq.heappop(ready)
|
|
|
|
|
| if usage[node.engine] < SLOT_LIMITS[node.engine]:
|
|
|
| usage[node.engine] += 1
|
| cycle_ops[node.engine].append(node.args)
|
| curr_cycle_nodes.append(node)
|
| else:
|
| deferred.append((prio, nid, node))
|
|
|
|
|
| for item in deferred:
|
| heapq.heappush(ready, item)
|
|
|
| if not curr_cycle_nodes and not ready and any(in_degree.values()):
|
|
|
|
|
|
|
|
|
| if len(instructions) == 0 and len(self.nodes) > 0:
|
| raise Exception("Deadlock or Cycle detected")
|
| break
|
|
|
| if not curr_cycle_nodes and not ready:
|
| break
|
|
|
| instructions.append(dict(cycle_ops))
|
|
|
|
|
| for node in curr_cycle_nodes:
|
| for child in node.children:
|
| in_degree[child] -= 1
|
| if in_degree[child] == 0:
|
| heapq.heappush(ready, (-child.priority, child.id, child))
|
|
|
| return instructions
|
|
|
| def _calc_priorities(self):
|
|
|
| memo = {}
|
| def get_dist(node):
|
| if node in memo: return memo[node]
|
| max_d = 0
|
| for child in node.children:
|
| max_d = max(max_d, get_dist(child))
|
| memo[node] = max_d + 1
|
| return max_d + 1
|
|
|
| for node in self.nodes:
|
| node.priority = get_dist(node)
|
|
|