diff --git "a/autotune_cache.json" "b/autotune_cache.json" --- "a/autotune_cache.json" +++ "b/autotune_cache.json" @@ -24368,12 +24368,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 4194304, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24383,12 +24384,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 1941851, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24398,12 +24400,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 7877533, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24413,12 +24416,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 2226123, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24428,12 +24432,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 12748156, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24443,12 +24448,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 1911611, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24458,12 +24464,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 5017088, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24473,12 +24480,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 4194304, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24488,12 +24496,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 1112821, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24503,12 +24512,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 33122502, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24518,12 +24528,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 1284070, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24533,12 +24544,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 10949861, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24548,12 +24560,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 1571524, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24563,12 +24576,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 12857151, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24578,12 +24592,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 1641261, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24593,12 +24608,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 15495770, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24608,12 +24624,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 1808517, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24623,12 +24640,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 9814710, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24638,12 +24656,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 1780790, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24653,12 +24672,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 14310670, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24668,12 +24688,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 2113485, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24683,12 +24704,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 21145957, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24698,12 +24720,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 1782768, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24713,12 +24736,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 14828658, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24728,12 +24752,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 1577958, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24743,12 +24768,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 17012742, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24758,12 +24784,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 2345014, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24773,12 +24800,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 12887107, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24788,12 +24816,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 2168973, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24803,12 +24832,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 12754203, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24818,12 +24848,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 1873811, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24833,12 +24864,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 12506390, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24848,12 +24880,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 2223717, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24863,12 +24896,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 14387524, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24878,12 +24912,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 7898751, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24893,12 +24928,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 8018926, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24908,12 +24944,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 4194304, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24923,12 +24960,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 7889449, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24938,12 +24976,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 7552645, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24953,12 +24992,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 7813095, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { "kwargs": { @@ -24968,14 +25008,57 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 2, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, + "reg_inc_consumer": 0 + }, + "(23, 8355681, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { + "kwargs": { + "BM": 16, + "BK": 8 + }, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3, "maxnreg": null, - "pre_hook": null + "pre_hook": null, + "ir_override": null + }, + "(23, 7714807, 6, 8, 'torch.float32', 'torch.uint32', 'torch.float32', 'torch.float32')": { + "kwargs": { + "BM": 16, + "BK": 8 + }, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3, + "maxnreg": null, + "pre_hook": null, + "ir_override": null } }, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm.sparse_conv_fwd_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm.sparse_conv_bwd_weight_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_bwd_weight_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_splitk_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_bwd_weight_implicit_gemm_splitk_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_splitk": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_implicit_gemm_splitk.sparse_conv_bwd_weight_implicit_gemm_splitk": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm.sparse_conv_fwd_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm.sparse_conv_fwd_masked_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm.sparse_conv_bwd_weight_masked_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_implicit_gemm_splitk": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_masked_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_bwd_weight_masked_implicit_gemm_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_masked_implicit_gemm_splitk_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_bwd_weight_masked_implicit_gemm_splitk_kernel": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_fwd_masked_implicit_gemm_splitk": {}, + "flex_gemm.kernels.triton.spconv.sparse_conv_masked_implicit_gemm_splitk.sparse_conv_bwd_weight_masked_implicit_gemm_splitk": {}, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm.sparse_submanifold_conv_bwd_input_implicit_gemm_kernel": { "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -24986,12 +25069,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25002,12 +25086,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25018,12 +25103,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25034,12 +25120,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25050,12 +25137,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25066,12 +25154,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25082,12 +25171,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25098,12 +25188,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25114,12 +25205,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25130,12 +25222,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25146,12 +25239,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25162,12 +25256,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm.sparse_submanifold_conv_bwd_weight_implicit_gemm_kernel": { @@ -25180,12 +25275,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25196,12 +25292,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25212,12 +25309,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25228,12 +25326,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25244,12 +25343,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25260,12 +25360,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25276,12 +25377,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25292,12 +25394,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25308,12 +25411,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25324,12 +25428,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25340,12 +25445,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25356,12 +25462,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_implicit_gemm_kernel": { @@ -25374,12 +25481,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25390,12 +25498,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25406,12 +25515,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25422,12 +25532,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25438,12 +25549,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25454,12 +25566,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25470,12 +25583,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25486,12 +25600,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25502,12 +25617,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25518,12 +25634,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25534,12 +25651,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25550,12 +25668,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_implicit_gemm_kernel": { @@ -25568,12 +25687,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25584,12 +25704,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25600,12 +25721,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25616,12 +25738,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25632,12 +25755,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -25648,12 +25772,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25664,12 +25789,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25680,12 +25806,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25696,12 +25823,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25712,12 +25840,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25728,12 +25857,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25744,12 +25874,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_implicit_gemm_splitk_kernel": { @@ -25762,12 +25893,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25778,12 +25910,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25794,12 +25927,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25810,12 +25944,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25826,12 +25961,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25842,12 +25978,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25858,12 +25995,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25874,12 +26012,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25890,12 +26029,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25906,12 +26046,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25922,12 +26063,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25938,12 +26080,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25954,12 +26097,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25970,12 +26114,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -25986,12 +26131,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26002,12 +26148,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26018,12 +26165,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26034,12 +26182,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_implicit_gemm_splitk_kernel": { @@ -26052,12 +26201,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26068,12 +26218,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26084,12 +26235,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26100,12 +26252,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26116,12 +26269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26132,12 +26286,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26148,12 +26303,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26164,12 +26320,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26180,12 +26337,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26196,12 +26354,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26212,12 +26371,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26228,12 +26388,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -26244,12 +26405,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_implicit_gemm_splitk": { @@ -26302,12 +26464,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26318,12 +26481,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26334,12 +26498,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26350,12 +26515,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26366,12 +26532,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26382,12 +26549,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26398,12 +26566,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26414,12 +26583,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26430,12 +26600,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26446,12 +26617,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26462,12 +26634,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26478,12 +26651,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26494,12 +26668,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26510,12 +26685,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26526,12 +26702,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26542,12 +26719,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26558,12 +26736,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26574,12 +26753,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26590,12 +26770,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26606,12 +26787,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26622,12 +26804,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26638,12 +26821,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26654,12 +26838,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26670,12 +26855,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26686,12 +26872,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26702,12 +26889,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26718,12 +26906,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26734,12 +26923,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26750,12 +26940,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26766,12 +26957,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26782,12 +26974,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26798,12 +26991,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26814,12 +27008,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26830,12 +27025,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26846,12 +27042,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26862,12 +27059,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26878,12 +27076,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26894,12 +27093,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26910,12 +27110,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26926,12 +27127,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26942,12 +27144,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26958,12 +27161,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26974,12 +27178,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -26990,12 +27195,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27006,12 +27212,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27022,12 +27229,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27038,12 +27246,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27054,12 +27263,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27070,12 +27280,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27086,12 +27297,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27102,12 +27314,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27118,12 +27331,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27134,12 +27348,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27150,12 +27365,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27166,12 +27382,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27182,12 +27399,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27198,12 +27416,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27214,12 +27433,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27230,12 +27450,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27246,12 +27467,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27262,12 +27484,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27278,12 +27501,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27294,12 +27518,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27310,12 +27535,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27326,12 +27552,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27342,12 +27569,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27358,12 +27586,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27374,12 +27603,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27390,12 +27620,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27406,12 +27637,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27422,12 +27654,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27438,12 +27671,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27454,12 +27688,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -27470,12 +27705,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27486,12 +27722,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27502,12 +27739,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27518,12 +27756,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27534,12 +27773,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27550,12 +27790,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -27566,12 +27807,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm.sparse_submanifold_conv_bwd_weight_masked_implicit_gemm_kernel": { @@ -27584,12 +27826,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27600,12 +27843,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27616,12 +27860,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27632,12 +27877,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27648,12 +27894,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27664,12 +27911,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27680,12 +27928,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27696,12 +27945,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27712,12 +27962,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27728,12 +27979,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27744,12 +27996,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27760,12 +28013,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27776,12 +28030,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27792,12 +28047,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27808,12 +28064,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27824,12 +28081,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27840,12 +28098,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27856,12 +28115,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27872,12 +28132,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27888,12 +28149,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27904,12 +28166,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27920,12 +28183,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27936,12 +28200,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27952,12 +28217,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27968,12 +28234,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -27984,12 +28251,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28000,12 +28268,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28016,12 +28285,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28032,12 +28302,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28048,12 +28319,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28064,12 +28336,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28080,12 +28353,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28096,12 +28370,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28112,12 +28387,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28128,12 +28404,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28144,12 +28421,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28160,12 +28438,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28176,12 +28455,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28192,12 +28472,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28208,12 +28489,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28224,12 +28506,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28240,12 +28523,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28256,12 +28540,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28272,12 +28557,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -28288,12 +28574,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28304,12 +28591,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28320,12 +28608,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28336,12 +28625,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28352,12 +28642,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28368,12 +28659,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -28384,12 +28676,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_masked_implicit_gemm_kernel": { @@ -28402,12 +28695,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28418,12 +28712,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28434,12 +28729,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28450,12 +28746,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28466,12 +28763,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28482,12 +28780,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28498,12 +28797,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28514,12 +28814,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28530,12 +28831,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28546,12 +28848,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28562,12 +28865,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28578,12 +28882,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28594,12 +28899,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28610,12 +28916,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28626,12 +28933,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28642,12 +28950,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28658,12 +28967,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28674,12 +28984,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28690,12 +29001,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28706,12 +29018,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28722,12 +29035,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28738,12 +29052,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28754,12 +29069,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28770,12 +29086,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28786,12 +29103,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28802,12 +29120,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28818,12 +29137,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28834,12 +29154,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28850,12 +29171,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28866,12 +29188,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28882,12 +29205,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28898,12 +29222,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28914,12 +29239,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28930,12 +29256,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28946,12 +29273,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28962,12 +29290,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28978,12 +29307,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -28994,12 +29324,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29010,12 +29341,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29026,12 +29358,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29042,12 +29375,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29058,12 +29392,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29074,12 +29409,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29090,12 +29426,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29106,12 +29443,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29122,12 +29460,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29138,12 +29477,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29154,12 +29494,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29170,12 +29511,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29186,12 +29528,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29202,12 +29545,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29218,12 +29562,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29234,12 +29579,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29250,12 +29596,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29266,12 +29613,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29282,12 +29630,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29298,12 +29647,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29314,12 +29664,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29330,12 +29681,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29346,12 +29698,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29362,12 +29715,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29378,12 +29732,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29394,12 +29749,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29410,12 +29766,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29426,12 +29783,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29442,12 +29800,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29458,12 +29817,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29474,12 +29834,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29490,12 +29851,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29506,12 +29868,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29522,12 +29885,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29538,12 +29902,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29554,12 +29919,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -29570,12 +29936,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29586,12 +29953,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29602,12 +29970,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29618,12 +29987,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29634,12 +30004,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29650,12 +30021,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -29666,12 +30038,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_masked_implicit_gemm_kernel": { @@ -29684,12 +30057,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29700,12 +30074,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29716,12 +30091,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29732,12 +30108,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29748,12 +30125,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29764,12 +30142,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29780,12 +30159,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29796,12 +30176,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29812,12 +30193,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29828,12 +30210,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29844,12 +30227,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29860,12 +30244,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29876,12 +30261,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29892,12 +30278,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29908,12 +30295,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29924,12 +30312,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29940,12 +30329,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29956,12 +30346,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29972,12 +30363,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -29988,12 +30380,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30004,12 +30397,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30020,12 +30414,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30036,12 +30431,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30052,12 +30448,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30068,12 +30465,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30084,12 +30482,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30100,12 +30499,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30116,12 +30516,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30132,12 +30533,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30148,12 +30550,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30164,12 +30567,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30180,12 +30584,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30196,12 +30601,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30212,12 +30618,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30228,12 +30635,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30244,12 +30652,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30260,12 +30669,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30276,12 +30686,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30292,12 +30703,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30308,12 +30720,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30324,12 +30737,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30340,12 +30754,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30356,12 +30771,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30372,12 +30788,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -30388,12 +30805,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30404,12 +30822,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30420,12 +30839,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30436,12 +30856,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30452,12 +30873,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30468,12 +30890,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -30484,12 +30907,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_masked_implicit_gemm_splitk_kernel": { @@ -30502,12 +30926,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30518,12 +30943,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30534,12 +30960,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30550,12 +30977,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30566,12 +30994,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30582,12 +31011,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30598,12 +31028,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30614,12 +31045,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30630,12 +31062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30646,12 +31079,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30662,12 +31096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30678,12 +31113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30694,12 +31130,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30710,12 +31147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30726,12 +31164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30742,12 +31181,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30758,12 +31198,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30774,12 +31215,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30790,12 +31232,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30806,12 +31249,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30822,12 +31266,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30838,12 +31283,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30854,12 +31300,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30870,12 +31317,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30886,12 +31334,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30902,12 +31351,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30918,12 +31368,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30934,12 +31385,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30950,12 +31402,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30966,12 +31419,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30982,12 +31436,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -30998,12 +31453,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31014,12 +31470,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31030,12 +31487,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31046,12 +31504,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31062,12 +31521,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31078,12 +31538,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31094,12 +31555,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31110,12 +31572,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31126,12 +31589,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31142,12 +31606,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31158,12 +31623,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31174,12 +31640,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31190,12 +31657,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31206,12 +31674,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31222,12 +31691,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31238,12 +31708,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31254,12 +31725,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31270,12 +31742,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31286,12 +31759,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31302,12 +31776,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31318,12 +31793,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31334,12 +31810,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31350,12 +31827,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31366,12 +31844,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31382,12 +31861,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31398,12 +31878,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31414,12 +31895,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31430,12 +31912,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31446,12 +31929,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31462,12 +31946,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31478,12 +31963,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31494,12 +31980,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31510,12 +31997,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31526,12 +32014,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31542,12 +32031,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31558,12 +32048,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31574,12 +32065,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31590,12 +32082,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31606,12 +32099,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31622,12 +32116,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31638,12 +32133,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31654,12 +32150,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31670,12 +32167,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31686,12 +32184,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31702,12 +32201,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31718,12 +32218,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31734,12 +32235,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31750,12 +32252,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31766,12 +32269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31782,12 +32286,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31798,12 +32303,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31814,12 +32320,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31830,12 +32337,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31846,12 +32354,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31862,12 +32371,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31878,12 +32388,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31894,12 +32405,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31910,12 +32422,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31926,12 +32439,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31942,12 +32456,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31958,12 +32473,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31974,12 +32490,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -31990,12 +32507,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32006,12 +32524,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32022,12 +32541,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32038,12 +32558,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32054,12 +32575,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32070,12 +32592,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32086,12 +32609,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32102,12 +32626,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32118,12 +32643,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32134,12 +32660,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32150,12 +32677,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32166,12 +32694,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32182,12 +32711,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32198,12 +32728,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32214,12 +32745,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32230,12 +32762,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32246,12 +32779,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32262,12 +32796,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32278,12 +32813,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32294,12 +32830,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32310,12 +32847,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32326,12 +32864,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32342,12 +32881,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32358,12 +32898,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32374,12 +32915,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32390,12 +32932,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32406,12 +32949,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32422,12 +32966,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32438,12 +32983,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32454,12 +33000,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32470,12 +33017,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32486,12 +33034,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32502,12 +33051,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32518,12 +33068,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32534,12 +33085,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32550,12 +33102,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32566,12 +33119,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32582,12 +33136,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32598,12 +33153,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32614,12 +33170,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32630,12 +33187,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32646,12 +33204,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32662,12 +33221,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32678,12 +33238,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32694,12 +33255,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32710,12 +33272,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32726,12 +33289,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32742,12 +33306,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32758,12 +33323,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32774,12 +33340,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32790,12 +33357,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32806,12 +33374,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32822,12 +33391,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32838,12 +33408,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32854,12 +33425,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32870,12 +33442,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32886,12 +33459,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32902,12 +33476,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32918,12 +33493,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32934,12 +33510,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32950,12 +33527,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32966,12 +33544,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32982,12 +33561,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -32998,12 +33578,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33014,12 +33595,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33030,12 +33612,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33046,12 +33629,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33062,12 +33646,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33078,12 +33663,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33094,12 +33680,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33110,12 +33697,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33126,12 +33714,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33142,12 +33731,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33158,12 +33748,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33174,12 +33765,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33190,12 +33782,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33206,12 +33799,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33222,12 +33816,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -33238,12 +33833,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_weight_masked_implicit_gemm_splitk_kernel": { @@ -33256,12 +33852,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33272,12 +33869,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33288,12 +33886,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33304,12 +33903,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33320,12 +33920,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33336,12 +33937,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33352,12 +33954,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33368,12 +33971,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33384,12 +33988,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33400,12 +34005,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33416,12 +34022,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33432,12 +34039,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33448,12 +34056,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33464,12 +34073,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33480,12 +34090,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33496,12 +34107,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33512,12 +34124,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33528,12 +34141,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33544,12 +34158,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33560,12 +34175,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33576,12 +34192,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33592,12 +34209,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33608,12 +34226,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33624,12 +34243,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33640,12 +34260,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33656,12 +34277,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33672,12 +34294,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33688,12 +34311,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33704,12 +34328,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33720,12 +34345,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33736,12 +34362,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33752,12 +34379,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33768,12 +34396,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33784,12 +34413,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33800,12 +34430,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33816,12 +34447,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33832,12 +34464,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33848,12 +34481,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33864,12 +34498,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33880,12 +34515,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33896,12 +34532,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33912,12 +34549,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33928,12 +34566,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33944,12 +34583,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33960,12 +34600,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33976,12 +34617,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -33992,12 +34634,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34008,12 +34651,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34024,12 +34668,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34040,12 +34685,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34056,12 +34702,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34072,12 +34719,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34088,12 +34736,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34104,12 +34753,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34120,12 +34770,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34136,12 +34787,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34152,12 +34804,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34168,12 +34821,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34184,12 +34838,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34200,12 +34855,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34216,12 +34872,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34232,12 +34889,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34248,12 +34906,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34264,12 +34923,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34280,12 +34940,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34296,12 +34957,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34312,12 +34974,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34328,12 +34991,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34344,12 +35008,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34360,12 +35025,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34376,12 +35042,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34392,12 +35059,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34408,12 +35076,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34424,12 +35093,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34440,12 +35110,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34456,12 +35127,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34472,12 +35144,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34488,12 +35161,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34504,12 +35178,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34520,12 +35195,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34536,12 +35212,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34552,12 +35229,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34568,12 +35246,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34584,12 +35263,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34600,12 +35280,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34616,12 +35297,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34632,12 +35314,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34648,12 +35331,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34664,12 +35348,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34680,12 +35365,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34696,12 +35382,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34712,12 +35399,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34728,12 +35416,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34744,12 +35433,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34760,12 +35450,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34776,12 +35467,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34792,12 +35484,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34808,12 +35501,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34824,12 +35518,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34840,12 +35535,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34856,12 +35552,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34872,12 +35569,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34888,12 +35586,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34904,12 +35603,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34920,12 +35620,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34936,12 +35637,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34952,12 +35654,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34968,12 +35671,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -34984,12 +35688,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35000,12 +35705,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35016,12 +35722,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35032,12 +35739,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35048,12 +35756,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35064,12 +35773,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35080,12 +35790,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35096,12 +35807,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35112,12 +35824,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35128,12 +35841,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35144,12 +35858,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35160,12 +35875,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35176,12 +35892,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35192,12 +35909,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35208,12 +35926,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35224,12 +35943,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35240,12 +35960,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35256,12 +35977,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35272,12 +35994,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35288,12 +36011,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35304,12 +36028,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35320,12 +36045,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35336,12 +36062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35352,12 +36079,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35368,12 +36096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35384,12 +36113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35400,12 +36130,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35416,12 +36147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35432,12 +36164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35448,12 +36181,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35464,12 +36198,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35480,12 +36215,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35496,12 +36232,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35512,12 +36249,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35528,12 +36266,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35544,12 +36283,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35560,12 +36300,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35576,12 +36317,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35592,12 +36334,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35608,12 +36351,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35624,12 +36368,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35640,12 +36385,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35656,12 +36402,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35672,12 +36419,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35688,12 +36436,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35704,12 +36453,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35720,12 +36470,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35736,12 +36487,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35752,12 +36504,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35768,12 +36521,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35784,12 +36538,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35800,12 +36555,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35816,12 +36572,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35832,12 +36589,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35848,12 +36606,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35864,12 +36623,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35880,12 +36640,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35896,12 +36657,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35912,12 +36674,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35928,12 +36691,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35944,12 +36708,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35960,12 +36725,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35976,12 +36742,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -35992,12 +36759,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36008,12 +36776,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36024,12 +36793,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36040,12 +36810,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36056,12 +36827,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36072,12 +36844,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36088,12 +36861,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36104,12 +36878,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36120,12 +36895,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36136,12 +36912,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36152,12 +36929,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36168,12 +36946,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36184,12 +36963,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36200,12 +36980,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36216,12 +36997,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36232,12 +37014,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36248,12 +37031,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36264,12 +37048,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36280,12 +37065,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36296,12 +37082,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36312,12 +37099,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36328,12 +37116,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36344,12 +37133,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36360,12 +37150,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36376,12 +37167,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36392,12 +37184,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36408,12 +37201,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36424,12 +37218,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36440,12 +37235,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36456,12 +37252,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36472,12 +37269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36488,12 +37286,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36504,12 +37303,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36520,12 +37320,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36536,12 +37337,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36552,12 +37354,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36568,12 +37371,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36584,12 +37388,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36600,12 +37405,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36616,12 +37422,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36632,12 +37439,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36648,12 +37456,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36664,12 +37473,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36680,12 +37490,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36696,12 +37507,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36712,12 +37524,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36728,12 +37541,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36744,12 +37558,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36760,12 +37575,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36776,12 +37592,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36792,12 +37609,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36808,12 +37626,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36824,12 +37643,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36840,12 +37660,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36856,12 +37677,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36872,12 +37694,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36888,12 +37711,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36904,12 +37728,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36920,12 +37745,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36936,12 +37762,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36952,12 +37779,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36968,12 +37796,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -36984,12 +37813,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37000,12 +37830,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37016,12 +37847,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37032,12 +37864,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37048,12 +37881,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37064,12 +37898,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37080,12 +37915,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37096,12 +37932,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37112,12 +37949,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37128,12 +37966,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37144,12 +37983,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37160,12 +38000,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37176,12 +38017,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37192,12 +38034,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37208,12 +38051,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37224,12 +38068,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37240,12 +38085,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37256,12 +38102,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37272,12 +38119,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37288,12 +38136,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37304,12 +38153,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37320,12 +38170,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37336,12 +38187,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37352,12 +38204,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37368,12 +38221,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37384,12 +38238,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37400,12 +38255,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37416,12 +38272,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37432,12 +38289,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37448,12 +38306,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37464,12 +38323,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37480,12 +38340,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37496,12 +38357,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37512,12 +38374,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37528,12 +38391,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37544,12 +38408,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37560,12 +38425,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37576,12 +38442,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37592,12 +38459,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37608,12 +38476,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37624,12 +38493,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37640,12 +38510,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37656,12 +38527,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37672,12 +38544,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37688,12 +38561,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37704,12 +38578,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37720,12 +38595,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37736,12 +38612,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37752,12 +38629,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37768,12 +38646,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37784,12 +38663,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37800,12 +38680,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37816,12 +38697,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37832,12 +38714,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37848,12 +38731,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37864,12 +38748,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37880,12 +38765,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37896,12 +38782,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37912,12 +38799,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37928,12 +38816,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37944,12 +38833,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37960,12 +38850,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37976,12 +38867,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -37992,12 +38884,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38008,12 +38901,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38024,12 +38918,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38040,12 +38935,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38056,12 +38952,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38072,12 +38969,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38088,12 +38986,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38104,12 +39003,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38120,12 +39020,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38136,12 +39037,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38152,12 +39054,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38168,12 +39071,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38184,12 +39088,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38200,12 +39105,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38216,12 +39122,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38232,12 +39139,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38248,12 +39156,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38264,12 +39173,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38280,12 +39190,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38296,12 +39207,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38312,12 +39224,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38328,12 +39241,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38344,12 +39258,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38360,12 +39275,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38376,12 +39292,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38392,12 +39309,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38408,12 +39326,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38424,12 +39343,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.uint32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -38440,12 +39360,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_bwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_bwd_input_masked_implicit_gemm_splitk": { @@ -38924,12 +39845,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -38940,12 +39862,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -38956,12 +39879,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -38972,12 +39896,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -38988,12 +39913,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39004,12 +39930,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39020,12 +39947,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39036,12 +39964,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39052,12 +39981,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39068,12 +39998,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39084,12 +40015,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39100,12 +40032,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39116,12 +40049,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39132,12 +40066,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39148,12 +40083,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39164,12 +40100,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39180,12 +40117,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39196,12 +40134,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39212,12 +40151,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_implicit_gemm_splitk.sparse_submanifold_conv_fwd_implicit_gemm_kernel": { @@ -39230,12 +40170,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39246,12 +40187,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39262,12 +40204,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39278,12 +40221,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39294,12 +40238,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39310,12 +40255,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39326,12 +40272,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39342,12 +40289,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39358,12 +40306,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39374,12 +40323,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39390,12 +40340,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39406,12 +40357,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float16')": { "kwargs": { @@ -39422,12 +40374,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39438,12 +40391,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39454,12 +40408,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39470,12 +40425,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39486,12 +40442,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39502,12 +40459,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39518,12 +40476,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_implicit_gemm_splitk.sparse_submanifold_conv_fwd_implicit_gemm_splitk_kernel": { @@ -39536,12 +40495,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39552,12 +40512,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39568,12 +40529,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39584,12 +40546,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39600,12 +40563,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39616,12 +40580,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39632,12 +40597,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39648,12 +40614,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39664,12 +40631,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39680,12 +40648,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39696,12 +40665,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39712,12 +40682,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39728,12 +40699,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39744,12 +40716,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39760,12 +40733,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39776,12 +40750,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39792,12 +40767,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39808,12 +40784,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39824,12 +40801,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39840,12 +40818,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39856,12 +40835,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39872,12 +40852,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39888,12 +40869,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39904,12 +40886,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39920,12 +40903,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39936,12 +40920,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39952,12 +40937,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39968,12 +40954,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 16, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -39984,12 +40971,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40000,12 +40988,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.float32')": { "kwargs": { @@ -40016,12 +41005,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_implicit_gemm_splitk.sparse_submanifold_conv_fwd_implicit_gemm_splitk": { @@ -40078,12 +41068,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40094,12 +41085,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40110,12 +41102,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40126,12 +41119,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40142,12 +41136,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40158,12 +41153,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40174,12 +41170,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40190,12 +41187,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40206,12 +41204,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40222,12 +41221,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40238,12 +41238,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40254,12 +41255,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40270,12 +41272,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40286,12 +41289,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40302,12 +41306,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40318,12 +41323,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40334,12 +41340,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40350,12 +41357,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40366,12 +41374,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40382,12 +41391,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40398,12 +41408,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40414,12 +41425,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40430,12 +41442,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40446,12 +41459,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40462,12 +41476,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40478,12 +41493,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40494,12 +41510,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40510,12 +41527,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40526,12 +41544,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40542,12 +41561,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40558,12 +41578,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40574,12 +41595,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40590,12 +41612,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40606,12 +41629,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40622,12 +41646,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40638,12 +41663,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40654,12 +41680,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40670,12 +41697,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40686,12 +41714,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40702,12 +41731,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40718,12 +41748,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40734,12 +41765,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40750,12 +41782,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40766,12 +41799,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40782,12 +41816,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40798,12 +41833,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40814,12 +41850,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40830,12 +41867,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40846,12 +41884,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40862,12 +41901,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40878,12 +41918,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40894,12 +41935,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40910,12 +41952,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40926,12 +41969,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40942,12 +41986,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40958,12 +42003,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40974,12 +42020,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -40990,12 +42037,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41006,12 +42054,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41022,12 +42071,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41038,12 +42088,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41054,12 +42105,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41070,12 +42122,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41086,12 +42139,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41102,12 +42156,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41118,12 +42173,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41134,12 +42190,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41150,12 +42207,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41166,12 +42224,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41182,12 +42241,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41198,12 +42258,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41214,12 +42275,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41230,12 +42292,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41246,12 +42309,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41262,12 +42326,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41278,12 +42343,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41294,12 +42360,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41310,12 +42377,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41326,12 +42394,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41342,12 +42411,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41358,12 +42428,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41374,12 +42445,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41390,12 +42462,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41406,12 +42479,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41422,12 +42496,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41438,12 +42513,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41454,12 +42530,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41470,12 +42547,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41486,12 +42564,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41502,12 +42581,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41518,12 +42598,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41534,12 +42615,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41550,12 +42632,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41566,12 +42649,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41582,12 +42666,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41598,12 +42683,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41614,12 +42700,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41630,12 +42717,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41646,12 +42734,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41662,12 +42751,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41678,12 +42768,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41694,12 +42785,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41710,12 +42802,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41726,12 +42819,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41742,12 +42836,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41758,12 +42853,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41774,12 +42870,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41790,12 +42887,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41806,12 +42904,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41822,12 +42921,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41838,12 +42938,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -41854,12 +42955,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -41870,12 +42972,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -41886,12 +42989,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -41902,12 +43006,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -41918,12 +43023,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -41934,12 +43040,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -41950,12 +43057,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41966,12 +43074,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41982,12 +43091,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -41998,12 +43108,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42014,12 +43125,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42030,12 +43142,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42046,12 +43159,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42062,12 +43176,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_fwd_masked_implicit_gemm_kernel": { @@ -42080,12 +43195,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42096,12 +43212,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42112,12 +43229,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42128,12 +43246,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42144,12 +43263,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42160,12 +43280,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42176,12 +43297,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42192,12 +43314,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42208,12 +43331,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42224,12 +43348,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42240,12 +43365,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42256,12 +43382,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42272,12 +43399,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42288,12 +43416,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42304,12 +43433,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42320,12 +43450,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42336,12 +43467,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42352,12 +43484,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42368,12 +43501,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42384,12 +43518,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42400,12 +43535,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42416,12 +43552,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42432,12 +43569,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42448,12 +43586,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42464,12 +43603,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42480,12 +43620,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42496,12 +43637,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42512,12 +43654,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42528,12 +43671,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42544,12 +43688,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42560,12 +43705,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42576,12 +43722,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42592,12 +43739,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42608,12 +43756,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42624,12 +43773,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42640,12 +43790,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42656,12 +43807,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42672,12 +43824,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42688,12 +43841,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42704,12 +43858,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42720,12 +43875,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42736,12 +43892,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42752,12 +43909,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42768,12 +43926,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42784,12 +43943,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42800,12 +43960,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42816,12 +43977,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42832,12 +43994,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42848,12 +44011,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42864,12 +44028,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42880,12 +44045,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42896,12 +44062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42912,12 +44079,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42928,12 +44096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42944,12 +44113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42960,12 +44130,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42976,12 +44147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -42992,12 +44164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43008,12 +44181,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43024,12 +44198,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43040,12 +44215,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43056,12 +44232,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43072,12 +44249,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43088,12 +44266,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43104,12 +44283,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43120,12 +44300,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43136,12 +44317,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43152,12 +44334,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43168,12 +44351,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43184,12 +44368,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43200,12 +44385,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43216,12 +44402,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43232,12 +44419,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43248,12 +44436,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43264,12 +44453,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43280,12 +44470,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43296,12 +44487,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43312,12 +44504,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43328,12 +44521,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43344,12 +44538,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43360,12 +44555,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43376,12 +44572,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43392,12 +44589,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43408,12 +44606,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43424,12 +44623,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43440,12 +44640,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43456,12 +44657,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43472,12 +44674,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43488,12 +44691,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(23, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43504,12 +44708,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43520,12 +44725,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43536,12 +44742,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43552,12 +44759,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 256, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43568,12 +44776,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43584,12 +44793,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 512, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43600,12 +44810,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43616,12 +44827,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 1024, 4096, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43632,12 +44844,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43648,12 +44861,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43664,12 +44878,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(21, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43680,12 +44895,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43696,12 +44912,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43712,12 +44929,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43728,12 +44946,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43744,12 +44963,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43760,12 +44980,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43776,12 +44997,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 32, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43792,12 +45014,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43808,12 +45031,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 16, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43824,12 +45048,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43840,12 +45065,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43856,12 +45082,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43872,12 +45099,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43888,12 +45116,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43904,12 +45133,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43920,12 +45150,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 256, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43936,12 +45167,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(19, 128, 128, 27, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -43952,12 +45184,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 512, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43968,12 +45201,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(18, 512, 2048, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -43984,12 +45218,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 256, 256, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44000,12 +45235,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(20, 256, 1024, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44016,12 +45252,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 128, 128, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44032,12 +45269,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(22, 128, 512, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44048,12 +45286,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(24, 64, 64, 27, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float16')": { "kwargs": { @@ -44064,12 +45303,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk_kernel": { @@ -44082,12 +45322,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44098,12 +45339,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44114,12 +45356,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44130,12 +45373,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44146,12 +45390,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44162,12 +45407,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44178,12 +45424,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44194,12 +45441,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44210,12 +45458,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44226,12 +45475,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44242,12 +45492,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44258,12 +45509,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44274,12 +45526,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44290,12 +45543,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44306,12 +45560,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44322,12 +45577,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44338,12 +45594,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44354,12 +45611,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44370,12 +45628,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44386,12 +45645,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44402,12 +45662,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44418,12 +45679,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44434,12 +45696,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44450,12 +45713,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44466,12 +45730,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44482,12 +45747,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44498,12 +45764,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44514,12 +45781,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44530,12 +45798,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44546,12 +45815,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44562,12 +45832,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44578,12 +45849,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44594,12 +45866,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44610,12 +45883,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44626,12 +45900,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44642,12 +45917,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44658,12 +45934,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44674,12 +45951,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44690,12 +45968,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44706,12 +45985,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44722,12 +46002,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44738,12 +46019,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44754,12 +46036,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44770,12 +46053,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44786,12 +46070,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44802,12 +46087,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44818,12 +46104,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44834,12 +46121,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44850,12 +46138,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44866,12 +46155,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44882,12 +46172,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44898,12 +46189,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44914,12 +46206,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44930,12 +46223,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44946,12 +46240,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44962,12 +46257,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44978,12 +46274,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -44994,12 +46291,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45010,12 +46308,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45026,12 +46325,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45042,12 +46342,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45058,12 +46359,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45074,12 +46376,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45090,12 +46393,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45106,12 +46410,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45122,12 +46427,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45138,12 +46444,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45154,12 +46461,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45170,12 +46478,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45186,12 +46495,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45202,12 +46512,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45218,12 +46529,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45234,12 +46546,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45250,12 +46563,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45266,12 +46580,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45282,12 +46597,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45298,12 +46614,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45314,12 +46631,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45330,12 +46648,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45346,12 +46665,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45362,12 +46682,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45378,12 +46699,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45394,12 +46716,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45410,12 +46733,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45426,12 +46750,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45442,12 +46767,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45458,12 +46784,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45474,12 +46801,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45490,12 +46818,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45506,12 +46835,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45522,12 +46852,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45538,12 +46869,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45554,12 +46886,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45570,12 +46903,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45586,12 +46920,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45602,12 +46937,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45618,12 +46954,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45634,12 +46971,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45650,12 +46988,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45666,12 +47005,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45682,12 +47022,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45698,12 +47039,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45714,12 +47056,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45730,12 +47073,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45746,12 +47090,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45762,12 +47107,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45778,12 +47124,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 512, 2048, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45794,12 +47141,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45810,12 +47158,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45826,12 +47175,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45842,12 +47192,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45858,12 +47209,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45874,12 +47226,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45890,12 +47243,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45906,12 +47260,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45922,12 +47277,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45938,12 +47294,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45954,12 +47311,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45970,12 +47328,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -45986,12 +47345,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46002,12 +47362,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 256, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46018,12 +47379,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46034,12 +47396,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46050,12 +47413,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46066,12 +47430,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46082,12 +47447,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46098,12 +47464,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46114,12 +47481,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46130,12 +47498,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46146,12 +47515,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46162,12 +47532,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46178,12 +47549,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46194,12 +47566,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46210,12 +47583,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46226,12 +47600,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46242,12 +47617,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46258,12 +47634,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46274,12 +47651,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46290,12 +47668,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 4096, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46306,12 +47685,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46322,12 +47702,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46338,12 +47719,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46354,12 +47736,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46370,12 +47753,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46386,12 +47770,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46402,12 +47787,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 128, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46418,12 +47804,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46434,12 +47821,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46450,12 +47838,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46466,12 +47855,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46482,12 +47872,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46498,12 +47889,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46514,12 +47906,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46530,12 +47923,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46546,12 +47940,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46562,12 +47957,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 256, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46578,12 +47974,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 1024, 4096, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46594,12 +47991,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 512, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46610,12 +48008,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46626,12 +48025,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46642,12 +48042,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 512, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46658,12 +48059,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46674,12 +48076,13 @@ "num_warps": 8, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 1024, 1024, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46690,12 +48093,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(16, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46706,12 +48110,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(17, 512, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46722,12 +48127,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 256, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46738,12 +48144,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46754,12 +48161,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46770,12 +48178,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46786,12 +48195,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46802,12 +48212,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46818,12 +48229,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46834,12 +48246,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46850,12 +48263,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46866,12 +48280,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46882,12 +48297,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46898,12 +48314,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46914,12 +48331,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46930,12 +48348,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46946,12 +48365,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46962,12 +48382,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46978,12 +48399,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -46994,12 +48416,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47010,12 +48433,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(14, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47026,12 +48450,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47042,12 +48467,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47058,12 +48484,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47074,12 +48501,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47090,12 +48518,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47106,12 +48535,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47122,12 +48552,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47138,12 +48569,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47154,12 +48586,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47170,12 +48603,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47186,12 +48620,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47202,12 +48637,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(12, 128, 32, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47218,12 +48654,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47234,12 +48671,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47250,12 +48688,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47266,12 +48705,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47282,12 +48722,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47298,12 +48739,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(10, 256, 64, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47314,12 +48756,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47330,12 +48773,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47346,12 +48790,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47362,12 +48807,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 512, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47378,12 +48824,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 512, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47394,12 +48841,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 512, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47410,12 +48858,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47426,12 +48875,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47442,12 +48892,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47458,12 +48909,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 128, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47474,12 +48926,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 128, 27, 512, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47490,12 +48943,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(8, 512, 128, 27, 1024, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47506,12 +48960,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47522,12 +48977,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47538,12 +48994,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47554,12 +49011,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(15, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47570,12 +49028,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 128, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47586,12 +49045,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 128, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47602,12 +49062,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 128, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47618,12 +49079,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47634,12 +49096,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47650,12 +49113,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 32, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47666,12 +49130,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 32, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47682,12 +49147,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 32, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47698,12 +49164,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 32, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47714,12 +49181,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 128, 32, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47730,12 +49198,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 512, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47746,12 +49215,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 512, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47762,12 +49232,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 512, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47778,12 +49249,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 512, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47794,12 +49266,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 512, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47810,12 +49283,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 512, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47826,12 +49300,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 128, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47842,12 +49317,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 128, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47858,12 +49334,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 128, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47874,12 +49351,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 128, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47890,12 +49368,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 128, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47906,12 +49385,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 512, 128, 27, 512, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47922,12 +49402,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 64, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47938,12 +49419,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 64, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47954,12 +49436,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47970,12 +49453,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -47986,12 +49470,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 16, 27, 2, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48002,12 +49487,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 16, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48018,12 +49504,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 16, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48034,12 +49521,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(13, 64, 16, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48050,12 +49538,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 256, 27, 4, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48066,12 +49555,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 256, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48082,12 +49572,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 256, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48098,12 +49589,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 256, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48114,12 +49606,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 256, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48130,12 +49623,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 256, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48146,12 +49640,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 64, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48162,12 +49657,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 64, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48178,12 +49674,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 64, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48194,12 +49691,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 64, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48210,12 +49708,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 64, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48226,12 +49725,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 256, 64, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48242,12 +49742,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(6, 1024, 1024, 27, 8, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48258,12 +49759,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(6, 1024, 1024, 27, 16, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48274,12 +49776,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(6, 1024, 1024, 27, 32, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48290,12 +49793,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(6, 1024, 1024, 27, 64, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48306,12 +49810,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(6, 1024, 1024, 27, 128, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48322,12 +49827,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(6, 1024, 1024, 27, 256, True, 'torch.float16', 'torch.float16', 'torch.float16', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48338,12 +49844,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(7, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48354,12 +49861,13 @@ "num_warps": 2, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(9, 1024, 1024, 27, 8, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48370,12 +49878,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 }, "(11, 1024, 1024, 27, 4, True, 'torch.float32', 'torch.float32', 'torch.float32', 'torch.uint32', 'torch.int64', 'torch.float32')": { "kwargs": { @@ -48386,12 +49895,13 @@ "num_warps": 4, "num_ctas": 1, "num_stages": 4, + "maxnreg": null, + "pre_hook": null, + "ir_override": null, "num_buffers_warp_spec": 0, "num_consumer_groups": 0, "reg_dec_producer": 0, - "reg_inc_consumer": 0, - "maxnreg": null, - "pre_hook": null + "reg_inc_consumer": 0 } }, "flex_gemm.kernels.triton.spconv.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk.sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk": {