{ "device": "NVIDIA RTX PRO 6000 Blackwell Server Edition", "procrustes_quality": [ { "N": 32, "k": 8, "cos_full": 0.43593162298202515, "cos_pinv": 0.21416151523590088, "cos_lerp": 0.42476722598075867, "lerp_alpha": 0.3, "cos_slerp": 0.21416151523590088, "slerp_alpha": -1.0, "cos_subspace": 0.42993202805519104, "cos_stay_k": 0.42145586013793945, "nn_pinv": 0.1765306144952774, "nn_lerp": 0.6806122660636902, "nn_slerp": 0.1765306144952774, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.42476722598075867, "0.5": 0.40883490443229675, "0.7": 0.3679943382740021, "0.9": 0.27967971563339233, "1.0": 0.21416151523590088 } }, { "N": 32, "k": 16, "cos_full": 0.43704530596733093, "cos_pinv": 0.2966524362564087, "cos_lerp": 0.4258710741996765, "lerp_alpha": 0.3, "cos_slerp": 0.2966524362564087, "slerp_alpha": -1.0, "cos_subspace": 0.43163612484931946, "cos_stay_k": 0.4251972436904907, "nn_pinv": 0.30000001192092896, "nn_lerp": 0.6777551174163818, "nn_slerp": 0.30000001192092896, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.4258710741996765, "0.5": 0.4110897481441498, "0.7": 0.38054221868515015, "0.9": 0.329505056142807, "1.0": 0.2966524362564087 } }, { "N": 32, "k": 24, "cos_full": 0.44052907824516296, "cos_pinv": 0.38639554381370544, "cos_lerp": 0.436450332403183, "lerp_alpha": 0.3, "cos_slerp": 0.38639554381370544, "slerp_alpha": -1.0, "cos_subspace": 0.4368778169155121, "cos_stay_k": 0.4383596181869507, "nn_pinv": 0.5551020503044128, "nn_lerp": 0.7716326713562012, "nn_slerp": 0.5551020503044128, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.436450332403183, "0.5": 0.4308852553367615, "0.7": 0.4184540808200836, "0.9": 0.3987543284893036, "1.0": 0.38639554381370544 } }, { "N": 48, "k": 8, "cos_full": 0.4421226978302002, "cos_pinv": 0.17636379599571228, "cos_lerp": 0.43055838346481323, "lerp_alpha": 0.3, "cos_slerp": 0.17636379599571228, "slerp_alpha": -1.0, "cos_subspace": 0.4350314438343048, "cos_stay_k": 0.4192100465297699, "nn_pinv": 0.10224489867687225, "nn_lerp": 0.7016326785087585, "nn_slerp": 0.10224489867687225, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.43055838346481323, "0.5": 0.4157794713973999, "0.7": 0.3731876015663147, "0.9": 0.2649416923522949, "1.0": 0.17636379599571228 } }, { "N": 48, "k": 16, "cos_full": 0.44219619035720825, "cos_pinv": 0.24936461448669434, "cos_lerp": 0.42898446321487427, "lerp_alpha": 0.3, "cos_slerp": 0.24936461448669434, "slerp_alpha": -1.0, "cos_subspace": 0.43540576100349426, "cos_stay_k": 0.4292367100715637, "nn_pinv": 0.22959183156490326, "nn_lerp": 0.6665306091308594, "nn_slerp": 0.22959183156490326, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.42898446321487427, "0.5": 0.4116378426551819, "0.7": 0.37226247787475586, "0.9": 0.2990289330482483, "1.0": 0.24936461448669434 } }, { "N": 48, "k": 24, "cos_full": 0.44319379329681396, "cos_pinv": 0.30473363399505615, "cos_lerp": 0.4294096529483795, "lerp_alpha": 0.3, "cos_slerp": 0.30473363399505615, "slerp_alpha": -1.0, "cos_subspace": 0.4366039037704468, "cos_stay_k": 0.43148913979530334, "nn_pinv": 0.3257142901420593, "nn_lerp": 0.6755102276802063, "nn_slerp": 0.3257142901420593, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.4294096529483795, "0.5": 0.4138883054256439, "0.7": 0.3837534487247467, "0.9": 0.33541104197502136, "1.0": 0.30473363399505615 } }, { "N": 48, "k": 32, "cos_full": 0.44762489199638367, "cos_pinv": 0.3621036410331726, "cos_lerp": 0.4396836459636688, "lerp_alpha": 0.3, "cos_slerp": 0.3621036410331726, "slerp_alpha": -1.0, "cos_subspace": 0.44293174147605896, "cos_stay_k": 0.4425356388092041, "nn_pinv": 0.45367348194122314, "nn_lerp": 0.7283673286437988, "nn_slerp": 0.45367348194122314, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.4396836459636688, "0.5": 0.4302048087120056, "0.7": 0.41093045473098755, "0.9": 0.38085830211639404, "1.0": 0.3621036410331726 } }, { "N": 64, "k": 8, "cos_full": 0.44745853543281555, "cos_pinv": 0.16023682057857513, "cos_lerp": 0.4355566203594208, "lerp_alpha": 0.3, "cos_slerp": 0.16023682057857513, "slerp_alpha": -1.0, "cos_subspace": 0.43897074460983276, "cos_stay_k": 0.43231040239334106, "nn_pinv": 0.1024489775300026, "nn_lerp": 0.7077550888061523, "nn_slerp": 0.1024489775300026, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.4355566203594208, "0.5": 0.422730952501297, "0.7": 0.38195523619651794, "0.9": 0.26452571153640747, "1.0": 0.16023682057857513 } }, { "N": 64, "k": 16, "cos_full": 0.44443005323410034, "cos_pinv": 0.21778510510921478, "cos_lerp": 0.42995432019233704, "lerp_alpha": 0.3, "cos_slerp": 0.21778510510921478, "slerp_alpha": -1.0, "cos_subspace": 0.4355003833770752, "cos_stay_k": 0.4299428462982178, "nn_pinv": 0.16448979079723358, "nn_lerp": 0.6577550768852234, "nn_slerp": 0.16448979079723358, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.42995432019233704, "0.5": 0.41322508454322815, "0.7": 0.3706657588481903, "0.9": 0.2816699743270874, "1.0": 0.21778510510921478 } }, { "N": 64, "k": 24, "cos_full": 0.445288747549057, "cos_pinv": 0.2677716314792633, "cos_lerp": 0.4294731318950653, "lerp_alpha": 0.3, "cos_slerp": 0.2677716314792633, "slerp_alpha": -1.0, "cos_subspace": 0.4362858235836029, "cos_stay_k": 0.4332144558429718, "nn_pinv": 0.24102041125297546, "nn_lerp": 0.6646938920021057, "nn_slerp": 0.24102041125297546, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.4294731318950653, "0.5": 0.41239339113235474, "0.7": 0.37548330426216125, "0.9": 0.3105800151824951, "1.0": 0.2677716314792633 } }, { "N": 64, "k": 32, "cos_full": 0.4467809498310089, "cos_pinv": 0.3090856969356537, "cos_lerp": 0.4323803186416626, "lerp_alpha": 0.3, "cos_slerp": 0.3090856969356537, "slerp_alpha": -1.0, "cos_subspace": 0.4390243887901306, "cos_stay_k": 0.43744954466819763, "nn_pinv": 0.31224489212036133, "nn_lerp": 0.6795918345451355, "nn_slerp": 0.31224489212036133, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.4323803186416626, "0.5": 0.4174533486366272, "0.7": 0.3877297341823578, "0.9": 0.339660108089447, "1.0": 0.3090856969356537 } }, { "N": 96, "k": 16, "cos_full": 0.4266941249370575, "cos_pinv": 0.16444618999958038, "cos_lerp": 0.40349096059799194, "lerp_alpha": 0.3, "cos_slerp": 0.16444618999958038, "slerp_alpha": -1.0, "cos_subspace": 0.4077111780643463, "cos_stay_k": 0.4019833505153656, "nn_pinv": 0.1320408135652542, "nn_lerp": 0.7206122279167175, "nn_slerp": 0.1320408135652542, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.40349096059799194, "0.5": 0.38955193758010864, "0.7": 0.3486329913139343, "0.9": 0.2459246963262558, "1.0": 0.16444618999958038 } }, { "N": 96, "k": 24, "cos_full": 0.4259049594402313, "cos_pinv": 0.2023046761751175, "cos_lerp": 0.401383638381958, "lerp_alpha": 0.3, "cos_slerp": 0.2023046761751175, "slerp_alpha": -1.0, "cos_subspace": 0.4069177210330963, "cos_stay_k": 0.403384268283844, "nn_pinv": 0.1997959166765213, "nn_lerp": 0.7085714340209961, "nn_slerp": 0.1997959166765213, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.401383638381958, "0.5": 0.38544148206710815, "0.7": 0.3450133800506592, "0.9": 0.26154762506484985, "1.0": 0.2023046761751175 } }, { "N": 96, "k": 32, "cos_full": 0.4240572154521942, "cos_pinv": 0.23629310727119446, "cos_lerp": 0.39960119128227234, "lerp_alpha": 0.3, "cos_slerp": 0.23629310727119446, "slerp_alpha": -1.0, "cos_subspace": 0.4056648313999176, "cos_stay_k": 0.40559738874435425, "nn_pinv": 0.2412244826555252, "nn_lerp": 0.6877551078796387, "nn_slerp": 0.2412244826555252, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.39960119128227234, "0.5": 0.38373640179634094, "0.7": 0.34743860363960266, "0.9": 0.28089070320129395, "1.0": 0.23629310727119446 } }, { "N": 96, "k": 48, "cos_full": 0.4237736165523529, "cos_pinv": 0.29778167605400085, "cos_lerp": 0.40501973032951355, "lerp_alpha": 0.3, "cos_slerp": 0.29778167605400085, "slerp_alpha": -1.0, "cos_subspace": 0.40799397230148315, "cos_stay_k": 0.41385555267333984, "nn_pinv": 0.3938775360584259, "nn_lerp": 0.7165306210517883, "nn_slerp": 0.3938775360584259, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.40501973032951355, "0.5": 0.39429953694343567, "0.7": 0.36880189180374146, "0.9": 0.32548192143440247, "1.0": 0.29778167605400085 } }, { "N": 128, "k": 16, "cos_full": 0.40680068731307983, "cos_pinv": 0.13796983659267426, "cos_lerp": 0.37397128343582153, "lerp_alpha": 0.3, "cos_slerp": 0.13796983659267426, "slerp_alpha": -1.0, "cos_subspace": 0.37697634100914, "cos_stay_k": 0.376295268535614, "nn_pinv": 0.12918367981910706, "nn_lerp": 0.7569387555122375, "nn_slerp": 0.12918367981910706, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.37397128343582153, "0.5": 0.3628230392932892, "0.7": 0.3268125057220459, "0.9": 0.2248772829771042, "1.0": 0.13796983659267426 } }, { "N": 128, "k": 24, "cos_full": 0.4072082042694092, "cos_pinv": 0.1678781509399414, "cos_lerp": 0.3732585906982422, "lerp_alpha": 0.3, "cos_slerp": 0.1678781509399414, "slerp_alpha": -1.0, "cos_subspace": 0.37738507986068726, "cos_stay_k": 0.377827912569046, "nn_pinv": 0.16938775777816772, "nn_lerp": 0.7387754917144775, "nn_slerp": 0.16938775777816772, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.3732585906982422, "0.5": 0.3597899377346039, "0.7": 0.32184135913848877, "0.9": 0.23397228121757507, "1.0": 0.1678781509399414 } }, { "N": 128, "k": 32, "cos_full": 0.4063572287559509, "cos_pinv": 0.1860375851392746, "cos_lerp": 0.3729659616947174, "lerp_alpha": 0.3, "cos_slerp": 0.1860375851392746, "slerp_alpha": -1.0, "cos_subspace": 0.37776076793670654, "cos_stay_k": 0.3736386001110077, "nn_pinv": 0.2173469364643097, "nn_lerp": 0.7228571176528931, "nn_slerp": 0.2173469364643097, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.3729659616947174, "0.5": 0.35855358839035034, "0.7": 0.3207029104232788, "0.9": 0.24178646504878998, "1.0": 0.1860375851392746 } }, { "N": 128, "k": 48, "cos_full": 0.407288521528244, "cos_pinv": 0.23970888555049896, "cos_lerp": 0.3782598674297333, "lerp_alpha": 0.3, "cos_slerp": 0.23970888555049896, "slerp_alpha": -1.0, "cos_subspace": 0.38115182518959045, "cos_stay_k": 0.3868360221385956, "nn_pinv": 0.3100000023841858, "nn_lerp": 0.7326530814170837, "nn_slerp": 0.3100000023841858, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.3782598674297333, "0.5": 0.36661890149116516, "0.7": 0.3355896472930908, "0.9": 0.2777838706970215, "1.0": 0.23970888555049896 } }, { "N": 128, "k": 64, "cos_full": 0.4102143943309784, "cos_pinv": 0.2780764698982239, "cos_lerp": 0.38525161147117615, "lerp_alpha": 0.3, "cos_slerp": 0.2780764698982239, "slerp_alpha": -1.0, "cos_subspace": 0.3880009055137634, "cos_stay_k": 0.3936968147754669, "nn_pinv": 0.3942857086658478, "nn_lerp": 0.7287755012512207, "nn_slerp": 0.3942857086658478, "nn_subspace": 1.0, "lerp_all": { "0.3": 0.38525161147117615, "0.5": 0.3766993582248688, "0.7": 0.35177841782569885, "0.9": 0.3069446086883545, "1.0": 0.2780764698982239 } } ], "projection_quality": [ { "N": 32, "k": 8, "B": 256, "M": 1024, "energy_ratio": 0.309897, "recon_proj": 0.86495, "recon_trunc": 0.8308152, "s_rel_err": 0.562196, "subspace_cos": 0.443195, "proj_ms": 7.8495, "full_ms": 0.5081 }, { "N": 32, "k": 12, "B": 256, "M": 1024, "energy_ratio": 0.44739, "recon_proj": 0.78914398, "recon_trunc": 0.74345922, "s_rel_err": 0.460551, "subspace_cos": 0.550833, "proj_ms": 10.5558, "full_ms": 0.5081 }, { "N": 32, "k": 16, "B": 256, "M": 1024, "energy_ratio": 0.575644, "recon_proj": 0.70542347, "recon_trunc": 0.65149707, "s_rel_err": 0.33788, "subspace_cos": 0.643179, "proj_ms": 11.2219, "full_ms": 0.5081 }, { "N": 32, "k": 24, "B": 256, "M": 1024, "energy_ratio": 0.805888, "recon_proj": 0.44062796, "recon_trunc": 0.44062796, "s_rel_err": 1e-06, "subspace_cos": 1.000002, "proj_ms": 0.5103, "full_ms": 0.5081 }, { "N": 48, "k": 8, "B": 256, "M": 1024, "energy_ratio": 0.22334, "recon_proj": 0.91145521, "recon_trunc": 0.88116837, "s_rel_err": 0.788001, "subspace_cos": 0.364162, "proj_ms": 7.9006, "full_ms": 172.1359 }, { "N": 48, "k": 12, "B": 256, "M": 1024, "energy_ratio": 0.323881, "recon_proj": 0.86479664, "recon_trunc": 0.82215744, "s_rel_err": 0.657488, "subspace_cos": 0.445424, "proj_ms": 10.6681, "full_ms": 172.1359 }, { "N": 48, "k": 16, "B": 256, "M": 1024, "energy_ratio": 0.418652, "recon_proj": 0.8147006, "recon_trunc": 0.76236212, "s_rel_err": 0.412456, "subspace_cos": 0.519302, "proj_ms": 11.4898, "full_ms": 172.1359 }, { "N": 48, "k": 24, "B": 256, "M": 1024, "energy_ratio": 0.592351, "recon_proj": 0.70517737, "recon_trunc": 0.6383903, "s_rel_err": 0.317782, "subspace_cos": 0.643277, "proj_ms": 11.4974, "full_ms": 172.1359 }, { "N": 48, "k": 32, "B": 256, "M": 1024, "energy_ratio": 0.74707, "recon_proj": 0.57555926, "recon_trunc": 0.5028553, "s_rel_err": 0.30761, "subspace_cos": 0.757457, "proj_ms": 180.6153, "full_ms": 172.1359 }, { "N": 64, "k": 8, "B": 256, "M": 1024, "energy_ratio": 0.178277, "recon_proj": 0.93415421, "recon_trunc": 0.90635061, "s_rel_err": 0.963514, "subspace_cos": 0.315245, "proj_ms": 7.9174, "full_ms": 182.058 }, { "N": 64, "k": 12, "B": 256, "M": 1024, "energy_ratio": 0.259141, "recon_proj": 0.89969951, "recon_trunc": 0.86059946, "s_rel_err": 0.693676, "subspace_cos": 0.389778, "proj_ms": 10.6934, "full_ms": 182.058 }, { "N": 64, "k": 16, "B": 256, "M": 1024, "energy_ratio": 0.335778, "recon_proj": 0.86406291, "recon_trunc": 0.81487316, "s_rel_err": 0.602513, "subspace_cos": 0.448424, "proj_ms": 11.3111, "full_ms": 182.058 }, { "N": 64, "k": 24, "B": 256, "M": 1024, "energy_ratio": 0.477809, "recon_proj": 0.78865892, "recon_trunc": 0.72251719, "s_rel_err": 0.349523, "subspace_cos": 0.550496, "proj_ms": 11.2071, "full_ms": 182.058 }, { "N": 64, "k": 32, "B": 256, "M": 1024, "energy_ratio": 0.606424, "recon_proj": 0.70486772, "recon_trunc": 0.62725997, "s_rel_err": 0.31157, "subspace_cos": 0.643771, "proj_ms": 176.4529, "full_ms": 182.058 }, { "N": 64, "k": 48, "B": 256, "M": 1024, "energy_ratio": 0.827353, "recon_proj": 0.49853975, "recon_trunc": 0.41544521, "s_rel_err": 0.308973, "subspace_cos": 0.81383, "proj_ms": 204.6248, "full_ms": 182.058 }, { "N": 96, "k": 8, "B": 256, "M": 1024, "energy_ratio": 0.130945, "recon_proj": 0.95620376, "recon_trunc": 0.93204349, "s_rel_err": 1.203286, "subspace_cos": 0.25833, "proj_ms": 8.035, "full_ms": 295.4513 }, { "N": 96, "k": 16, "B": 256, "M": 1024, "energy_ratio": 0.248348, "recon_proj": 0.91137886, "recon_trunc": 0.86680406, "s_rel_err": 0.872127, "subspace_cos": 0.363736, "proj_ms": 11.4258, "full_ms": 295.4513 }, { "N": 96, "k": 24, "B": 256, "M": 1024, "energy_ratio": 0.355739, "recon_proj": 0.86390597, "recon_trunc": 0.80249721, "s_rel_err": 0.558701, "subspace_cos": 0.447504, "proj_ms": 11.2383, "full_ms": 295.4513 }, { "N": 96, "k": 32, "B": 256, "M": 1024, "energy_ratio": 0.454543, "recon_proj": 0.81463653, "recon_trunc": 0.73840213, "s_rel_err": 0.471019, "subspace_cos": 0.516298, "proj_ms": 175.186, "full_ms": 295.4513 }, { "N": 96, "k": 48, "B": 256, "M": 1024, "energy_ratio": 0.629703, "recon_proj": 0.70540565, "recon_trunc": 0.60839772, "s_rel_err": 0.324269, "subspace_cos": 0.640656, "proj_ms": 200.525, "full_ms": 295.4513 }, { "N": 96, "k": 64, "B": 256, "M": 1024, "energy_ratio": 0.778265, "recon_proj": 0.57526517, "recon_trunc": 0.47079238, "s_rel_err": 0.307349, "subspace_cos": 0.75777, "proj_ms": 306.5314, "full_ms": 295.4513 }, { "N": 128, "k": 8, "B": 256, "M": 1024, "energy_ratio": 0.106015, "recon_proj": 0.96762085, "recon_trunc": 0.94562519, "s_rel_err": 1.467844, "subspace_cos": 0.225075, "proj_ms": 8.0847, "full_ms": 436.5506 }, { "N": 128, "k": 16, "B": 256, "M": 1024, "energy_ratio": 0.201916, "recon_proj": 0.93442476, "recon_trunc": 0.89346564, "s_rel_err": 1.002462, "subspace_cos": 0.314518, "proj_ms": 11.5088, "full_ms": 436.5506 }, { "N": 128, "k": 24, "B": 256, "M": 1024, "energy_ratio": 0.290409, "recon_proj": 0.899674, "recon_trunc": 0.84247589, "s_rel_err": 0.715509, "subspace_cos": 0.386668, "proj_ms": 11.4321, "full_ms": 436.5506 }, { "N": 128, "k": 32, "B": 256, "M": 1024, "energy_ratio": 0.372587, "recon_proj": 0.86432445, "recon_trunc": 0.7921918, "s_rel_err": 0.537362, "subspace_cos": 0.44471, "proj_ms": 174.9935, "full_ms": 436.5506 }, { "N": 128, "k": 48, "B": 256, "M": 1024, "energy_ratio": 0.520469, "recon_proj": 0.78865445, "recon_trunc": 0.69256634, "s_rel_err": 0.359839, "subspace_cos": 0.549803, "proj_ms": 198.2863, "full_ms": 436.5506 }, { "N": 128, "k": 64, "B": 256, "M": 1024, "energy_ratio": 0.649114, "recon_proj": 0.70549536, "recon_trunc": 0.59242892, "s_rel_err": 0.312149, "subspace_cos": 0.640748, "proj_ms": 305.3637, "full_ms": 436.5506 }, { "N": 128, "k": 96, "B": 256, "M": 1024, "energy_ratio": 0.856093, "recon_proj": 0.49853343, "recon_trunc": 0.37939623, "s_rel_err": 0.30107, "subspace_cos": 0.813627, "proj_ms": 452.6234, "full_ms": 436.5506 } ], "n_sweep": [ { "N": 2, "B": 512, "M": 1024, "torch_ms": 79.0398, "gram_ms": 0.2265, "best": "triton", "speedup_vs_torch": 3859.065, "triton_ms": 0.0205 }, { "N": 3, "B": 512, "M": 1024, "torch_ms": 118.394, "gram_ms": 0.2419, "best": "triton", "speedup_vs_torch": 5394.195, "triton_ms": 0.0219 }, { "N": 4, "B": 512, "M": 1024, "torch_ms": 125.2633, "gram_ms": 0.2553, "best": "gram", "speedup_vs_torch": 490.648 }, { "N": 5, "B": 512, "M": 1024, "torch_ms": 144.426, "gram_ms": 0.2576, "best": "gram", "speedup_vs_torch": 560.756 }, { "N": 6, "B": 512, "M": 1024, "torch_ms": 155.0421, "gram_ms": 0.2688, "best": "gram", "speedup_vs_torch": 576.89 }, { "N": 7, "B": 512, "M": 1024, "torch_ms": 163.7714, "gram_ms": 0.2803, "best": "gram", "speedup_vs_torch": 584.225 }, { "N": 8, "B": 512, "M": 1024, "torch_ms": 168.9336, "gram_ms": 0.2906, "best": "newton", "speedup_vs_torch": 582.14, "newton_ms": 0.2902 }, { "N": 10, "B": 512, "M": 1024, "torch_ms": 190.2917, "gram_ms": 0.3798, "best": "newton", "speedup_vs_torch": 502.216, "newton_ms": 0.3789 }, { "N": 12, "B": 512, "M": 1024, "torch_ms": 213.3941, "gram_ms": 0.3995, "best": "gram", "speedup_vs_torch": 534.112, "newton_ms": 0.4 }, { "N": 16, "B": 512, "M": 1024, "torch_ms": 230.67, "gram_ms": 0.4285, "best": "newton", "speedup_vs_torch": 538.632, "newton_ms": 0.4283 }, { "N": 20, "B": 512, "M": 1024, "torch_ms": 253.6572, "gram_ms": 0.5967, "best": "newton", "speedup_vs_torch": 425.616, "newton_ms": 0.596 }, { "N": 24, "B": 512, "M": 1024, "torch_ms": 272.2926, "gram_ms": 0.651, "best": "newton", "speedup_vs_torch": 418.55, "newton_ms": 0.6506, "proj16_ms": 0.6523 }, { "N": 32, "B": 512, "M": 1024, "torch_ms": 303.023, "gram_ms": 0.7953, "best": "newton", "speedup_vs_torch": 381.773, "newton_ms": 0.7937, "proj24_ms": 0.8001, "proj16_ms": 22.0252 }, { "N": 48, "B": 512, "M": 1024, "torch_ms": 550.7456, "gram_ms": 344.0493, "best": "proj24", "speedup_vs_torch": 24.544, "newton_ms": 344.2018, "proj24_ms": 22.4388, "proj16_ms": 22.4814 }, { "N": 64, "B": 512, "M": 1024, "torch_ms": 609.3521, "gram_ms": 365.2057, "best": "proj24", "speedup_vs_torch": 28.017, "newton_ms": 365.1476, "proj24_ms": 21.7492, "proj16_ms": 22.1726 }, { "N": 96, "B": 512, "M": 1024, "torch_ms": 973.8192, "gram_ms": 590.6363, "best": "proj24", "speedup_vs_torch": 44.545, "newton_ms": 590.6639, "proj24_ms": 21.8615, "proj16_ms": 22.3533 }, { "N": 128, "B": 512, "M": 1024, "torch_ms": 1421.9242, "gram_ms": 868.1439, "best": "proj24", "speedup_vs_torch": 64.385, "newton_ms": 868.2619, "proj24_ms": 22.0849, "proj16_ms": 22.4692 } ], "batch_sweeps": {}, "spatial_sweeps": {} }