diff --git "a/autotune_cache.json" "b/autotune_cache.json" --- "a/autotune_cache.json" +++ "b/autotune_cache.json" @@ -24368,13 +24368,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 4194304, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24384,13 +24384,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 1941851, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24400,13 +24400,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 7877533, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24416,13 +24416,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 2226123, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24432,13 +24432,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 12748156, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24448,13 +24448,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 1911611, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24464,13 +24464,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 5017088, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24480,13 +24480,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 4194304, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24496,13 +24496,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 1112821, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24512,13 +24512,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 33122502, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24528,13 +24528,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 1284070, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24544,13 +24544,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 10949861, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24560,13 +24560,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 1571524, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24576,13 +24576,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 12857151, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24592,13 +24592,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 1641261, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24608,13 +24608,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 15495770, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24624,13 +24624,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 1808517, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24640,13 +24640,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 9814710, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24656,13 +24656,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 1780790, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24672,13 +24672,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 14310670, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24688,13 +24688,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 2113485, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24704,13 +24704,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 21145957, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24720,13 +24720,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 1782768, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24736,13 +24736,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 14828658, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24752,13 +24752,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 1577958, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24768,13 +24768,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 17012742, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24784,13 +24784,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 2345014, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24800,13 +24800,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 12887107, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24816,13 +24816,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 2168973, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24832,13 +24832,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 12754203, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24848,13 +24848,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 1873811, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24864,13 +24864,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 12506390, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24880,13 +24880,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 2223717, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24896,13 +24896,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 14387524, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24912,13 +24912,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 7898751, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24928,13 +24928,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 8018926, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24944,13 +24944,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 4194304, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24960,13 +24960,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 7889449, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24976,13 +24976,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 7552645, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24992,13 +24992,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 7813095, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -25008,13 +25008,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 8355681, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -25024,6 +25024,10 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 3, + "num_buffers_warp_spec": 0, + "num_consumer_groups": 0, + "reg_dec_producer": 0, + "reg_inc_consumer": 0, "maxnreg": null, "pre_hook": null, "ir_override": null @@ -25036,6 +25040,10 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 3, + "num_buffers_warp_spec": 0, + "num_consumer_groups": 0, + "reg_dec_producer": 0, + "reg_inc_consumer": 0, "maxnreg": null, "pre_hook": null, "ir_override": null @@ -25048,29 +25056,60 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 3, + "num_buffers_warp_spec": 0, + "num_consumer_groups": 0, + "reg_dec_producer": 0, + "reg_inc_consumer": 0, "maxnreg": null, "pre_hook": null, "ir_override": null + }, + "(23, 9443920, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { + "kwargs": { + "BM": 16, + "BK": 8 + }, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 2, + "num_buffers_warp_spec": 0, + "num_consumer_groups": 0, + "reg_dec_producer": 0, + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null + }, + "(23, 7697892, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { + "kwargs": { + "BM": 16, + "BK": 8 + }, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 2, + "num_buffers_warp_spec": 0, + "num_consumer_groups": 0, + "reg_dec_producer": 0, + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null + }, + "(22, 7504199, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { + "kwargs": { + "BM": 16, + "BK": 8 + }, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 2, + "num_buffers_warp_spec": 0, + "num_consumer_groups": 0, + "reg_dec_producer": 0, + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null } }, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm.sparse_conv_fwd_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm.sparse_conv_bwd_weight_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_bwd_weight_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_splitk_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_bwd_weight_implicit_gemm_splitk_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_splitk": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_bwd_weight_implicit_gemm_splitk": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm.sparse_conv_fwd_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm.sparse_conv_fwd_masked_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm.sparse_conv_bwd_weight_masked_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_splitk": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_masked_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_bwd_weight_masked_implicit_gemm_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_masked_implicit_gemm_splitk_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_bwd_weight_masked_implicit_gemm_splitk_kernel": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_masked_implicit_gemm_splitk": {}, - "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_bwd_weight_masked_implicit_gemm_splitk": {}, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm.sparse_submanifold_conv_bwd_input_implicit_gemm_kernel": { "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25081,13 +25120,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25098,13 +25137,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25115,13 +25154,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25132,13 +25171,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25149,13 +25188,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25166,13 +25205,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25183,13 +25222,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25200,13 +25239,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25217,13 +25256,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25234,13 +25273,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25251,13 +25290,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25268,13 +25307,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm.sparse_submanifold_conv_bwd_weight_implicit_gemm_kernel": { @@ -25287,13 +25326,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25304,13 +25343,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25321,13 +25360,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25338,13 +25377,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25355,13 +25394,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25372,13 +25411,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25389,13 +25428,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25406,13 +25445,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25423,13 +25462,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25440,13 +25479,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25457,13 +25496,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25474,13 +25513,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_implicit_gemm_kernel": { @@ -25493,13 +25532,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25510,13 +25549,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25527,13 +25566,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25544,13 +25583,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25561,13 +25600,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25578,13 +25617,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25595,13 +25634,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25612,13 +25651,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25629,13 +25668,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25646,13 +25685,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25663,13 +25702,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25680,13 +25719,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_implicit_gemm_kernel": { @@ -25699,13 +25738,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25716,13 +25755,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25733,13 +25772,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25750,13 +25789,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25767,13 +25806,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25784,13 +25823,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25801,13 +25840,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25818,13 +25857,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25835,13 +25874,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25852,13 +25891,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25869,13 +25908,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25886,13 +25925,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_implicit_gemm_splitk_kernel": { @@ -25905,13 +25944,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25922,13 +25961,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25939,13 +25978,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25956,13 +25995,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25973,13 +26012,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25990,13 +26029,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26007,13 +26046,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26024,13 +26063,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26041,13 +26080,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26058,13 +26097,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26075,13 +26114,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26092,13 +26131,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26109,13 +26148,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26126,13 +26165,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26143,13 +26182,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26160,13 +26199,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26177,13 +26216,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26194,13 +26233,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_implicit_gemm_splitk_kernel": { @@ -26213,13 +26252,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26230,13 +26269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26247,13 +26286,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26264,13 +26303,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26281,13 +26320,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26298,13 +26337,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26315,13 +26354,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26332,13 +26371,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26349,13 +26388,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26366,13 +26405,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26383,13 +26422,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26400,13 +26439,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26417,13 +26456,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_implicit_gemm_splitk": { @@ -26476,13 +26515,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26493,13 +26532,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26510,13 +26549,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26527,13 +26566,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26544,13 +26583,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26561,13 +26600,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26578,13 +26617,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26595,13 +26634,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26612,13 +26651,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26629,13 +26668,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26646,13 +26685,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26663,13 +26702,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26680,13 +26719,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26697,13 +26736,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26714,13 +26753,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26731,13 +26770,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26748,13 +26787,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26765,13 +26804,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26782,13 +26821,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26799,13 +26838,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26816,13 +26855,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26833,13 +26872,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26850,13 +26889,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26867,13 +26906,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26884,13 +26923,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26901,13 +26940,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26918,13 +26957,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26935,13 +26974,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26952,13 +26991,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26969,13 +27008,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26986,13 +27025,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27003,13 +27042,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27020,13 +27059,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27037,13 +27076,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27054,13 +27093,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27071,13 +27110,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27088,13 +27127,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27105,13 +27144,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27122,13 +27161,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27139,13 +27178,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27156,13 +27195,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27173,13 +27212,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27190,13 +27229,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27207,13 +27246,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27224,13 +27263,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27241,13 +27280,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27258,13 +27297,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27275,13 +27314,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27292,13 +27331,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27309,13 +27348,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27326,13 +27365,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27343,13 +27382,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27360,13 +27399,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27377,13 +27416,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27394,13 +27433,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27411,13 +27450,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27428,13 +27467,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27445,13 +27484,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27462,13 +27501,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27479,13 +27518,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27496,13 +27535,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27513,13 +27552,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27530,13 +27569,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27547,13 +27586,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27564,13 +27603,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27581,13 +27620,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27598,13 +27637,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27615,13 +27654,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27632,13 +27671,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27649,13 +27688,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27666,13 +27705,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27683,13 +27722,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27700,13 +27739,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27717,13 +27756,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27734,13 +27773,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27751,13 +27790,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27768,13 +27807,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27785,13 +27824,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27802,13 +27841,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27819,13 +27858,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm.sparse_submanifold_conv_bwd_weight_masked_implicit_gemm_kernel": { @@ -27838,13 +27877,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27855,13 +27894,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27872,13 +27911,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27889,13 +27928,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27906,13 +27945,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27923,13 +27962,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27940,13 +27979,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27957,13 +27996,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27974,13 +28013,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27991,13 +28030,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28008,13 +28047,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28025,13 +28064,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28042,13 +28081,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28059,13 +28098,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28076,13 +28115,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28093,13 +28132,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28110,13 +28149,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28127,13 +28166,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28144,13 +28183,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28161,13 +28200,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28178,13 +28217,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28195,13 +28234,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28212,13 +28251,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28229,13 +28268,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28246,13 +28285,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28263,13 +28302,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28280,13 +28319,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28297,13 +28336,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28314,13 +28353,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28331,13 +28370,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28348,13 +28387,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28365,13 +28404,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28382,13 +28421,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28399,13 +28438,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28416,13 +28455,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28433,13 +28472,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28450,13 +28489,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28467,13 +28506,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28484,13 +28523,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28501,13 +28540,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28518,13 +28557,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28535,13 +28574,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28552,13 +28591,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28569,13 +28608,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28586,13 +28625,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28603,13 +28642,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28620,13 +28659,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28637,13 +28676,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28654,13 +28693,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28671,13 +28710,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28688,13 +28727,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_masked_implicit_gemm_kernel": { @@ -28707,13 +28746,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28724,13 +28763,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28741,13 +28780,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28758,13 +28797,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28775,13 +28814,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28792,13 +28831,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28809,13 +28848,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28826,13 +28865,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28843,13 +28882,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28860,13 +28899,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28877,13 +28916,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28894,13 +28933,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28911,13 +28950,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28928,13 +28967,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28945,13 +28984,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28962,13 +29001,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28979,13 +29018,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28996,13 +29035,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29013,13 +29052,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29030,13 +29069,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29047,13 +29086,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29064,13 +29103,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29081,13 +29120,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29098,13 +29137,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29115,13 +29154,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29132,13 +29171,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29149,13 +29188,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29166,13 +29205,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29183,13 +29222,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29200,13 +29239,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29217,13 +29256,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29234,13 +29273,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29251,13 +29290,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29268,13 +29307,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29285,13 +29324,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29302,13 +29341,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29319,13 +29358,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29336,13 +29375,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29353,13 +29392,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29370,13 +29409,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29387,13 +29426,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29404,13 +29443,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29421,13 +29460,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29438,13 +29477,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29455,13 +29494,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29472,13 +29511,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29489,13 +29528,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29506,13 +29545,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29523,13 +29562,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29540,13 +29579,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29557,13 +29596,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29574,13 +29613,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29591,13 +29630,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29608,13 +29647,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29625,13 +29664,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29642,13 +29681,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29659,13 +29698,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29676,13 +29715,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29693,13 +29732,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29710,13 +29749,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29727,13 +29766,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29744,13 +29783,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29761,13 +29800,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29778,13 +29817,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29795,13 +29834,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29812,13 +29851,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29829,13 +29868,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29846,13 +29885,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29863,13 +29902,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29880,13 +29919,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29897,13 +29936,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29914,13 +29953,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29931,13 +29970,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29948,13 +29987,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29965,13 +30004,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29982,13 +30021,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29999,13 +30038,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30016,13 +30055,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30033,13 +30072,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30050,13 +30089,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_masked_implicit_gemm_kernel": { @@ -30069,13 +30108,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30086,13 +30125,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30103,13 +30142,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30120,13 +30159,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30137,13 +30176,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30154,13 +30193,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30171,13 +30210,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30188,13 +30227,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30205,13 +30244,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30222,13 +30261,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30239,13 +30278,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30256,13 +30295,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30273,13 +30312,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30290,13 +30329,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30307,13 +30346,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30324,13 +30363,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30341,13 +30380,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30358,13 +30397,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30375,13 +30414,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30392,13 +30431,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30409,13 +30448,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30426,13 +30465,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30443,13 +30482,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30460,13 +30499,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30477,13 +30516,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30494,13 +30533,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30511,13 +30550,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30528,13 +30567,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30545,13 +30584,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30562,13 +30601,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30579,13 +30618,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30596,13 +30635,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30613,13 +30652,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30630,13 +30669,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30647,13 +30686,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30664,13 +30703,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30681,13 +30720,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30698,13 +30737,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30715,13 +30754,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30732,13 +30771,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30749,13 +30788,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30766,13 +30805,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30783,13 +30822,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30800,13 +30839,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30817,13 +30856,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30834,13 +30873,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30851,13 +30890,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30868,13 +30907,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30885,13 +30924,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30902,13 +30941,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30919,13 +30958,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_masked_implicit_gemm_splitk_kernel": { @@ -30938,13 +30977,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30955,13 +30994,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30972,13 +31011,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30989,13 +31028,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31006,13 +31045,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31023,13 +31062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31040,13 +31079,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31057,13 +31096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31074,13 +31113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31091,13 +31130,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31108,13 +31147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31125,13 +31164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31142,13 +31181,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31159,13 +31198,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31176,13 +31215,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31193,13 +31232,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31210,13 +31249,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31227,13 +31266,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31244,13 +31283,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31261,13 +31300,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31278,13 +31317,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31295,13 +31334,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31312,13 +31351,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31329,13 +31368,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31346,13 +31385,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31363,13 +31402,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31380,13 +31419,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31397,13 +31436,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31414,13 +31453,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31431,13 +31470,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31448,13 +31487,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31465,13 +31504,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31482,13 +31521,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31499,13 +31538,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31516,13 +31555,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31533,13 +31572,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31550,13 +31589,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31567,13 +31606,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31584,13 +31623,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31601,13 +31640,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31618,13 +31657,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31635,13 +31674,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31652,13 +31691,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31669,13 +31708,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31686,13 +31725,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31703,13 +31742,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31720,13 +31759,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31737,13 +31776,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31754,13 +31793,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31771,13 +31810,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31788,13 +31827,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31805,13 +31844,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31822,13 +31861,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31839,13 +31878,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31856,13 +31895,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31873,13 +31912,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31890,13 +31929,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31907,13 +31946,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31924,13 +31963,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31941,13 +31980,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31958,13 +31997,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31975,13 +32014,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31992,13 +32031,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32009,13 +32048,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32026,13 +32065,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32043,13 +32082,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32060,13 +32099,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32077,13 +32116,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32094,13 +32133,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32111,13 +32150,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32128,13 +32167,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32145,13 +32184,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32162,13 +32201,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32179,13 +32218,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32196,13 +32235,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32213,13 +32252,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32230,13 +32269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32247,13 +32286,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32264,13 +32303,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32281,13 +32320,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32298,13 +32337,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32315,13 +32354,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32332,13 +32371,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32349,13 +32388,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32366,13 +32405,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32383,13 +32422,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32400,13 +32439,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32417,13 +32456,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32434,13 +32473,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32451,13 +32490,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32468,13 +32507,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32485,13 +32524,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32502,13 +32541,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32519,13 +32558,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32536,13 +32575,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32553,13 +32592,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32570,13 +32609,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32587,13 +32626,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32604,13 +32643,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32621,13 +32660,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32638,13 +32677,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32655,13 +32694,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32672,13 +32711,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32689,13 +32728,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32706,13 +32745,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32723,13 +32762,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32740,13 +32779,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32757,13 +32796,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32774,13 +32813,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32791,13 +32830,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32808,13 +32847,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32825,13 +32864,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32842,13 +32881,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32859,13 +32898,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32876,13 +32915,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32893,13 +32932,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32910,13 +32949,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32927,13 +32966,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32944,13 +32983,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32961,13 +33000,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32978,13 +33017,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32995,13 +33034,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33012,13 +33051,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33029,13 +33068,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33046,13 +33085,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33063,13 +33102,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33080,13 +33119,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33097,13 +33136,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33114,13 +33153,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33131,13 +33170,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33148,13 +33187,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33165,13 +33204,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33182,13 +33221,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33199,13 +33238,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33216,13 +33255,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33233,13 +33272,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33250,13 +33289,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33267,13 +33306,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33284,13 +33323,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33301,13 +33340,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33318,13 +33357,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33335,13 +33374,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33352,13 +33391,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33369,13 +33408,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33386,13 +33425,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33403,13 +33442,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33420,13 +33459,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33437,13 +33476,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33454,13 +33493,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33471,13 +33510,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33488,13 +33527,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33505,13 +33544,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33522,13 +33561,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33539,13 +33578,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33556,13 +33595,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33573,13 +33612,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33590,13 +33629,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33607,13 +33646,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33624,13 +33663,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33641,13 +33680,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33658,13 +33697,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33675,13 +33714,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33692,13 +33731,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33709,13 +33748,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33726,13 +33765,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33743,13 +33782,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33760,13 +33799,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33777,13 +33816,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33794,13 +33833,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33811,13 +33850,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33828,13 +33867,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33845,13 +33884,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_masked_implicit_gemm_splitk_kernel": { @@ -33864,13 +33903,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33881,13 +33920,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33898,13 +33937,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33915,13 +33954,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33932,13 +33971,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33949,13 +33988,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33966,13 +34005,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33983,13 +34022,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34000,13 +34039,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34017,13 +34056,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34034,13 +34073,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34051,13 +34090,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34068,13 +34107,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34085,13 +34124,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34102,13 +34141,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34119,13 +34158,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34136,13 +34175,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34153,13 +34192,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34170,13 +34209,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34187,13 +34226,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34204,13 +34243,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34221,13 +34260,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34238,13 +34277,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34255,13 +34294,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34272,13 +34311,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34289,13 +34328,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34306,13 +34345,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34323,13 +34362,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34340,13 +34379,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34357,13 +34396,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34374,13 +34413,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34391,13 +34430,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34408,13 +34447,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34425,13 +34464,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34442,13 +34481,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34459,13 +34498,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34476,13 +34515,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34493,13 +34532,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34510,13 +34549,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34527,13 +34566,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34544,13 +34583,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34561,13 +34600,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34578,13 +34617,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34595,13 +34634,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34612,13 +34651,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34629,13 +34668,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34646,13 +34685,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34663,13 +34702,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34680,13 +34719,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34697,13 +34736,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34714,13 +34753,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34731,13 +34770,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34748,13 +34787,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34765,13 +34804,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34782,13 +34821,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34799,13 +34838,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34816,13 +34855,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34833,13 +34872,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34850,13 +34889,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34867,13 +34906,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34884,13 +34923,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34901,13 +34940,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34918,13 +34957,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34935,13 +34974,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34952,13 +34991,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34969,13 +35008,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34986,13 +35025,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35003,13 +35042,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35020,13 +35059,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35037,13 +35076,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35054,13 +35093,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35071,13 +35110,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35088,13 +35127,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35105,13 +35144,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35122,13 +35161,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35139,13 +35178,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35156,13 +35195,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35173,13 +35212,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35190,13 +35229,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35207,13 +35246,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35224,13 +35263,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35241,13 +35280,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35258,13 +35297,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35275,13 +35314,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35292,13 +35331,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35309,13 +35348,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35326,13 +35365,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35343,13 +35382,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35360,13 +35399,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35377,13 +35416,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35394,13 +35433,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35411,13 +35450,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35428,13 +35467,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35445,13 +35484,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35462,13 +35501,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35479,13 +35518,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35496,13 +35535,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35513,13 +35552,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35530,13 +35569,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35547,13 +35586,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35564,13 +35603,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35581,13 +35620,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35598,13 +35637,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35615,13 +35654,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35632,13 +35671,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35649,13 +35688,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35666,13 +35705,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35683,13 +35722,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35700,13 +35739,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35717,13 +35756,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35734,13 +35773,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35751,13 +35790,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35768,13 +35807,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35785,13 +35824,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35802,13 +35841,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35819,13 +35858,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35836,13 +35875,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35853,13 +35892,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35870,13 +35909,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35887,13 +35926,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35904,13 +35943,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35921,13 +35960,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35938,13 +35977,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35955,13 +35994,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35972,13 +36011,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35989,13 +36028,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36006,13 +36045,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36023,13 +36062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36040,13 +36079,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36057,13 +36096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36074,13 +36113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36091,13 +36130,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36108,13 +36147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36125,13 +36164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36142,13 +36181,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36159,13 +36198,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36176,13 +36215,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36193,13 +36232,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36210,13 +36249,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36227,13 +36266,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36244,13 +36283,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36261,13 +36300,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36278,13 +36317,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36295,13 +36334,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36312,13 +36351,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36329,13 +36368,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36346,13 +36385,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36363,13 +36402,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36380,13 +36419,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36397,13 +36436,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36414,13 +36453,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36431,13 +36470,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36448,13 +36487,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36465,13 +36504,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36482,13 +36521,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36499,13 +36538,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36516,13 +36555,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36533,13 +36572,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36550,13 +36589,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36567,13 +36606,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36584,13 +36623,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36601,13 +36640,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36618,13 +36657,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36635,13 +36674,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36652,13 +36691,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36669,13 +36708,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36686,13 +36725,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36703,13 +36742,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36720,13 +36759,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36737,13 +36776,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36754,13 +36793,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36771,13 +36810,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36788,13 +36827,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36805,13 +36844,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36822,13 +36861,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36839,13 +36878,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36856,13 +36895,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36873,13 +36912,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36890,13 +36929,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36907,13 +36946,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36924,13 +36963,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36941,13 +36980,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36958,13 +36997,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36975,13 +37014,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36992,13 +37031,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37009,13 +37048,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37026,13 +37065,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37043,13 +37082,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37060,13 +37099,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37077,13 +37116,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37094,13 +37133,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37111,13 +37150,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37128,13 +37167,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37145,13 +37184,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37162,13 +37201,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37179,13 +37218,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37196,13 +37235,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37213,13 +37252,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37230,13 +37269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37247,13 +37286,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37264,13 +37303,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37281,13 +37320,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37298,13 +37337,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37315,13 +37354,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37332,13 +37371,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37349,13 +37388,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37366,13 +37405,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37383,13 +37422,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37400,13 +37439,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37417,13 +37456,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37434,13 +37473,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37451,13 +37490,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37468,13 +37507,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37485,13 +37524,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37502,13 +37541,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37519,13 +37558,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37536,13 +37575,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37553,13 +37592,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37570,13 +37609,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37587,13 +37626,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37604,13 +37643,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37621,13 +37660,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37638,13 +37677,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37655,13 +37694,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37672,13 +37711,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37689,13 +37728,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37706,13 +37745,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37723,13 +37762,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37740,13 +37779,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37757,13 +37796,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37774,13 +37813,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37791,13 +37830,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37808,13 +37847,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37825,13 +37864,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37842,13 +37881,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37859,13 +37898,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37876,13 +37915,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37893,13 +37932,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37910,13 +37949,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37927,13 +37966,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37944,13 +37983,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37961,13 +38000,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37978,13 +38017,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37995,13 +38034,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38012,13 +38051,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38029,13 +38068,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38046,13 +38085,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38063,13 +38102,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38080,13 +38119,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38097,13 +38136,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38114,13 +38153,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38131,13 +38170,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38148,13 +38187,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38165,13 +38204,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38182,13 +38221,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38199,13 +38238,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38216,13 +38255,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38233,13 +38272,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38250,13 +38289,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38267,13 +38306,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38284,13 +38323,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38301,13 +38340,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38318,13 +38357,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38335,13 +38374,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38352,13 +38391,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38369,13 +38408,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38386,13 +38425,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38403,13 +38442,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38420,13 +38459,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38437,13 +38476,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38454,13 +38493,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38471,13 +38510,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38488,13 +38527,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38505,13 +38544,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38522,13 +38561,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38539,13 +38578,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38556,13 +38595,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38573,13 +38612,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38590,13 +38629,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38607,13 +38646,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38624,13 +38663,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38641,13 +38680,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38658,13 +38697,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38675,13 +38714,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38692,13 +38731,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38709,13 +38748,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38726,13 +38765,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38743,13 +38782,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38760,13 +38799,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38777,13 +38816,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38794,13 +38833,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38811,13 +38850,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38828,13 +38867,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38845,13 +38884,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38862,13 +38901,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38879,13 +38918,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38896,13 +38935,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38913,13 +38952,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38930,13 +38969,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38947,13 +38986,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38964,13 +39003,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38981,13 +39020,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38998,13 +39037,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39015,13 +39054,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39032,13 +39071,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39049,13 +39088,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39066,13 +39105,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39083,13 +39122,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39100,13 +39139,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39117,13 +39156,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39134,13 +39173,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39151,13 +39190,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39168,13 +39207,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39185,13 +39224,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39202,13 +39241,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39219,13 +39258,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39236,13 +39275,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39253,13 +39292,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39270,13 +39309,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39287,13 +39326,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39304,13 +39343,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39321,13 +39360,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39338,13 +39377,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39355,13 +39394,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39372,13 +39411,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_masked_implicit_gemm_splitk": { @@ -39857,13 +39896,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39874,13 +39913,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39891,13 +39930,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39908,13 +39947,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39925,13 +39964,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39942,13 +39981,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39959,13 +39998,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39976,13 +40015,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39993,13 +40032,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40010,13 +40049,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40027,13 +40066,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40044,13 +40083,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40061,13 +40100,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40078,13 +40117,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40095,13 +40134,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40112,13 +40151,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40129,13 +40168,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40146,13 +40185,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40163,13 +40202,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_implicit_gemm_splitk.sparse_submanifold_conv_fwd_implicit_gemm_kernel": { @@ -40182,13 +40221,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40199,13 +40238,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40216,13 +40255,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40233,13 +40272,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40250,13 +40289,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40267,13 +40306,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40284,13 +40323,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40301,13 +40340,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40318,13 +40357,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40335,13 +40374,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40352,13 +40391,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40369,13 +40408,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -40386,13 +40425,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40403,13 +40442,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40420,13 +40459,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40437,13 +40476,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40454,13 +40493,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40471,13 +40510,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40488,13 +40527,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_implicit_gemm_splitk.sparse_submanifold_conv_fwd_implicit_gemm_splitk_kernel": { @@ -40507,13 +40546,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40524,13 +40563,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40541,13 +40580,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40558,13 +40597,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40575,13 +40614,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40592,13 +40631,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40609,13 +40648,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40626,13 +40665,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40643,13 +40682,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40660,13 +40699,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40677,13 +40716,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40694,13 +40733,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40711,13 +40750,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40728,13 +40767,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40745,13 +40784,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40762,13 +40801,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40779,13 +40818,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40796,13 +40835,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40813,13 +40852,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40830,13 +40869,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40847,13 +40886,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40864,13 +40903,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40881,13 +40920,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40898,13 +40937,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40915,13 +40954,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40932,13 +40971,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40949,13 +40988,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40966,13 +41005,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 16, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40983,13 +41022,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -41000,13 +41039,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -41017,13 +41056,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_implicit_gemm_splitk.sparse_submanifold_conv_fwd_implicit_gemm_splitk": { @@ -41080,13 +41119,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41097,13 +41136,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41114,13 +41153,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41131,13 +41170,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41148,13 +41187,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41165,13 +41204,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41182,13 +41221,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41199,13 +41238,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41216,13 +41255,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41233,13 +41272,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41250,13 +41289,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41267,13 +41306,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41284,13 +41323,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41301,13 +41340,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41318,13 +41357,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41335,13 +41374,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41352,13 +41391,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41369,13 +41408,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41386,13 +41425,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41403,13 +41442,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41420,13 +41459,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41437,13 +41476,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41454,13 +41493,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41471,13 +41510,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41488,13 +41527,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41505,13 +41544,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41522,13 +41561,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41539,13 +41578,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41556,13 +41595,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41573,13 +41612,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41590,13 +41629,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41607,13 +41646,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41624,13 +41663,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41641,13 +41680,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41658,13 +41697,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41675,13 +41714,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41692,13 +41731,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41709,13 +41748,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41726,13 +41765,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41743,13 +41782,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41760,13 +41799,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41777,13 +41816,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41794,13 +41833,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41811,13 +41850,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41828,13 +41867,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41845,13 +41884,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41862,13 +41901,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41879,13 +41918,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41896,13 +41935,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41913,13 +41952,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41930,13 +41969,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41947,13 +41986,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41964,13 +42003,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41981,13 +42020,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41998,13 +42037,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42015,13 +42054,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42032,13 +42071,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42049,13 +42088,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42066,13 +42105,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42083,13 +42122,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42100,13 +42139,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42117,13 +42156,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42134,13 +42173,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42151,13 +42190,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42168,13 +42207,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42185,13 +42224,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42202,13 +42241,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42219,13 +42258,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42236,13 +42275,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42253,13 +42292,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42270,13 +42309,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42287,13 +42326,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42304,13 +42343,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42321,13 +42360,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42338,13 +42377,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42355,13 +42394,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42372,13 +42411,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42389,13 +42428,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42406,13 +42445,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42423,13 +42462,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42440,13 +42479,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42457,13 +42496,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42474,13 +42513,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42491,13 +42530,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42508,13 +42547,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42525,13 +42564,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42542,13 +42581,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42559,13 +42598,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42576,13 +42615,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42593,13 +42632,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42610,13 +42649,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42627,13 +42666,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42644,13 +42683,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42661,13 +42700,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42678,13 +42717,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42695,13 +42734,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42712,13 +42751,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42729,13 +42768,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42746,13 +42785,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42763,13 +42802,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42780,13 +42819,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42797,13 +42836,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42814,13 +42853,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42831,13 +42870,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42848,13 +42887,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42865,13 +42904,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42882,13 +42921,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42899,13 +42938,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42916,13 +42955,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42933,13 +42972,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42950,13 +42989,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -42967,13 +43006,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -42984,13 +43023,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43001,13 +43040,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43018,13 +43057,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43035,13 +43074,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43052,13 +43091,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43069,13 +43108,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43086,13 +43125,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43103,13 +43142,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43120,13 +43159,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43137,13 +43176,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43154,13 +43193,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43171,13 +43210,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43188,13 +43227,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_fwd_masked_implicit_gemm_kernel": { @@ -43207,13 +43246,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43224,13 +43263,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43241,13 +43280,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43258,13 +43297,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43275,13 +43314,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43292,13 +43331,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43309,13 +43348,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43326,13 +43365,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43343,13 +43382,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43360,13 +43399,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43377,13 +43416,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43394,13 +43433,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43411,13 +43450,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43428,13 +43467,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43445,13 +43484,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43462,13 +43501,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43479,13 +43518,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43496,13 +43535,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43513,13 +43552,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43530,13 +43569,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43547,13 +43586,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43564,13 +43603,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43581,13 +43620,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43598,13 +43637,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43615,13 +43654,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43632,13 +43671,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43649,13 +43688,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43666,13 +43705,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43683,13 +43722,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43700,13 +43739,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43717,13 +43756,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43734,13 +43773,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43751,13 +43790,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43768,13 +43807,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43785,13 +43824,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43802,13 +43841,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43819,13 +43858,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43836,13 +43875,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43853,13 +43892,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43870,13 +43909,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43887,13 +43926,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43904,13 +43943,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43921,13 +43960,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43938,13 +43977,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43955,13 +43994,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43972,13 +44011,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43989,13 +44028,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44006,13 +44045,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44023,13 +44062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44040,13 +44079,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44057,13 +44096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44074,13 +44113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44091,13 +44130,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44108,13 +44147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44125,13 +44164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44142,13 +44181,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44159,13 +44198,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44176,13 +44215,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44193,13 +44232,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44210,13 +44249,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44227,13 +44266,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44244,13 +44283,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44261,13 +44300,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44278,13 +44317,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44295,13 +44334,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44312,13 +44351,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44329,13 +44368,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44346,13 +44385,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44363,13 +44402,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44380,13 +44419,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44397,13 +44436,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44414,13 +44453,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44431,13 +44470,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44448,13 +44487,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44465,13 +44504,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44482,13 +44521,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44499,13 +44538,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44516,13 +44555,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44533,13 +44572,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44550,13 +44589,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44567,13 +44606,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44584,13 +44623,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44601,13 +44640,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44618,13 +44657,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44635,13 +44674,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44652,13 +44691,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44669,13 +44708,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44686,13 +44725,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44703,13 +44742,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(23, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44720,13 +44759,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44737,13 +44776,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44754,13 +44793,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44771,13 +44810,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44788,13 +44827,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44805,13 +44844,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44822,13 +44861,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44839,13 +44878,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44856,13 +44895,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44873,13 +44912,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44890,13 +44929,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(21, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44907,13 +44946,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44924,13 +44963,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44941,13 +44980,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44958,13 +44997,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44975,13 +45014,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44992,13 +45031,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45009,13 +45048,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45026,13 +45065,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45043,13 +45082,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45060,13 +45099,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45077,13 +45116,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45094,13 +45133,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45111,13 +45150,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45128,13 +45167,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45145,13 +45184,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45162,13 +45201,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45179,13 +45218,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(19, 128, 128, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45196,13 +45235,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45213,13 +45252,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(18, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45230,13 +45269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45247,13 +45286,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(20, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45264,13 +45303,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45281,13 +45320,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(22, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45298,13 +45337,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(24, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -45315,13 +45354,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk_kernel": { @@ -45334,13 +45373,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45351,13 +45390,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45368,13 +45407,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45385,13 +45424,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45402,13 +45441,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45419,13 +45458,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45436,13 +45475,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45453,13 +45492,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45470,13 +45509,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45487,13 +45526,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45504,13 +45543,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45521,13 +45560,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45538,13 +45577,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45555,13 +45594,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45572,13 +45611,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45589,13 +45628,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45606,13 +45645,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45623,13 +45662,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45640,13 +45679,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45657,13 +45696,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45674,13 +45713,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45691,13 +45730,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45708,13 +45747,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45725,13 +45764,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45742,13 +45781,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45759,13 +45798,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45776,13 +45815,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45793,13 +45832,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45810,13 +45849,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45827,13 +45866,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45844,13 +45883,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45861,13 +45900,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45878,13 +45917,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45895,13 +45934,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45912,13 +45951,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45929,13 +45968,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45946,13 +45985,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45963,13 +46002,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45980,13 +46019,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45997,13 +46036,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46014,13 +46053,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46031,13 +46070,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46048,13 +46087,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46065,13 +46104,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46082,13 +46121,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46099,13 +46138,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46116,13 +46155,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46133,13 +46172,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46150,13 +46189,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46167,13 +46206,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46184,13 +46223,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46201,13 +46240,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46218,13 +46257,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46235,13 +46274,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46252,13 +46291,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46269,13 +46308,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46286,13 +46325,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46303,13 +46342,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46320,13 +46359,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46337,13 +46376,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46354,13 +46393,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46371,13 +46410,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46388,13 +46427,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46405,13 +46444,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46422,13 +46461,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46439,13 +46478,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46456,13 +46495,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46473,13 +46512,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46490,13 +46529,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46507,13 +46546,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46524,13 +46563,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46541,13 +46580,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46558,13 +46597,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46575,13 +46614,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46592,13 +46631,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46609,13 +46648,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46626,13 +46665,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46643,13 +46682,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46660,13 +46699,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46677,13 +46716,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46694,13 +46733,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46711,13 +46750,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46728,13 +46767,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46745,13 +46784,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46762,13 +46801,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46779,13 +46818,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46796,13 +46835,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46813,13 +46852,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46830,13 +46869,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46847,13 +46886,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46864,13 +46903,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46881,13 +46920,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46898,13 +46937,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46915,13 +46954,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46932,13 +46971,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46949,13 +46988,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46966,13 +47005,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46983,13 +47022,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47000,13 +47039,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47017,13 +47056,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47034,13 +47073,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47051,13 +47090,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47068,13 +47107,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47085,13 +47124,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47102,13 +47141,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47119,13 +47158,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47136,13 +47175,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47153,13 +47192,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47170,13 +47209,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47187,13 +47226,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47204,13 +47243,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47221,13 +47260,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47238,13 +47277,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47255,13 +47294,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47272,13 +47311,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47289,13 +47328,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47306,13 +47345,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47323,13 +47362,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47340,13 +47379,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47357,13 +47396,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47374,13 +47413,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47391,13 +47430,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47408,13 +47447,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47425,13 +47464,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47442,13 +47481,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47459,13 +47498,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47476,13 +47515,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47493,13 +47532,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47510,13 +47549,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47527,13 +47566,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47544,13 +47583,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47561,13 +47600,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47578,13 +47617,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47595,13 +47634,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47612,13 +47651,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47629,13 +47668,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47646,13 +47685,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47663,13 +47702,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47680,13 +47719,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47697,13 +47736,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47714,13 +47753,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47731,13 +47770,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47748,13 +47787,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47765,13 +47804,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47782,13 +47821,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47799,13 +47838,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47816,13 +47855,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47833,13 +47872,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47850,13 +47889,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47867,13 +47906,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47884,13 +47923,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47901,13 +47940,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47918,13 +47957,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47935,13 +47974,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47952,13 +47991,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47969,13 +48008,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 256, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47986,13 +48025,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48003,13 +48042,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48020,13 +48059,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48037,13 +48076,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48054,13 +48093,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48071,13 +48110,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48088,13 +48127,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48105,13 +48144,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(16, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48122,13 +48161,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(17, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48139,13 +48178,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48156,13 +48195,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48173,13 +48212,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48190,13 +48229,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48207,13 +48246,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48224,13 +48263,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48241,13 +48280,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48258,13 +48297,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48275,13 +48314,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48292,13 +48331,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48309,13 +48348,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48326,13 +48365,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48343,13 +48382,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48360,13 +48399,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48377,13 +48416,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48394,13 +48433,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48411,13 +48450,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48428,13 +48467,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48445,13 +48484,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(14, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48462,13 +48501,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48479,13 +48518,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48496,13 +48535,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48513,13 +48552,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48530,13 +48569,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48547,13 +48586,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48564,13 +48603,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48581,13 +48620,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48598,13 +48637,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48615,13 +48654,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48632,13 +48671,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48649,13 +48688,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(12, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48666,13 +48705,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48683,13 +48722,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48700,13 +48739,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48717,13 +48756,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48734,13 +48773,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48751,13 +48790,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(10, 256, 64, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48768,13 +48807,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48785,13 +48824,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48802,13 +48841,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48819,13 +48858,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 512, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48836,13 +48875,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 512, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48853,13 +48892,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 512, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48870,13 +48909,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48887,13 +48926,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48904,13 +48943,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48921,13 +48960,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 128, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48938,13 +48977,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 128, 27, 512, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48955,13 +48994,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(8, 512, 128, 27, 1024, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48972,13 +49011,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48989,13 +49028,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49006,13 +49045,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49023,13 +49062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(15, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49040,13 +49079,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49057,13 +49096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49074,13 +49113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49091,13 +49130,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49108,13 +49147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49125,13 +49164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49142,13 +49181,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49159,13 +49198,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49176,13 +49215,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49193,13 +49232,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49210,13 +49249,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49227,13 +49266,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49244,13 +49283,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49261,13 +49300,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49278,13 +49317,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 512, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49295,13 +49334,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 512, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49312,13 +49351,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49329,13 +49368,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49346,13 +49385,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49363,13 +49402,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49380,13 +49419,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 128, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49397,13 +49436,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 512, 128, 27, 512, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49414,13 +49453,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49431,13 +49470,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49448,13 +49487,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49465,13 +49504,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49482,13 +49521,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49499,13 +49538,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49516,13 +49555,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49533,13 +49572,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(13, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49550,13 +49589,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49567,13 +49606,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49584,13 +49623,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49601,13 +49640,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49618,13 +49657,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 256, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49635,13 +49674,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 256, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49652,13 +49691,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49669,13 +49708,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49686,13 +49725,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49703,13 +49742,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49720,13 +49759,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49737,13 +49776,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 256, 64, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49754,13 +49793,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(6, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49771,13 +49810,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(6, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49788,13 +49827,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(6, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49805,13 +49844,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(6, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49822,13 +49861,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(6, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49839,13 +49878,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(6, 1024, 1024, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49856,13 +49895,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(7, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49873,13 +49912,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49890,13 +49929,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -49907,13 +49946,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, - "maxnreg": null, - "pre_hook": null, - "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0 + "reg_inc_consumer": 0, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk": {