diff --git "a/scripts/spectral_output/prisma/results.json" "b/scripts/spectral_output/prisma/results.json" new file mode 100644--- /dev/null +++ "b/scripts/spectral_output/prisma/results.json" @@ -0,0 +1,4932 @@ +{ + "embed.weight": { + "shape": [ + 32000, + 1024 + ], + "effective_rank": 955.3326416015625, + "stable_rank": 3.3479604721069336, + "spectral_norm": 114.62545013427734, + "frobenius_norm": 209.7351531982422, + "mp_bound": 34.9162720998438, + "n_above_mp": 1, + "n_total": 1024, + "signal_ratio": 0.0009765625, + "alpha": 0.2020451708629658, + "alpha_r2": 0.8814259770817735, + "condition_number": 51.396602630615234, + "top_10_sv": [ + 114.62545013427734, + 18.541969299316406, + 12.835403442382812, + 12.421660423278809, + 11.050704002380371, + 10.73924446105957, + 10.254462242126465, + 10.188952445983887, + 9.904229164123535, + 9.61025333404541 + ] + }, + "mirror_blocks.0.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 611.4822998046875, + "stable_rank": 8.072141647338867, + "spectral_norm": 19.211088180541992, + "frobenius_norm": 54.58161163330078, + "mp_bound": 1.1272666454315186, + "n_above_mp": 324, + "n_total": 1024, + "signal_ratio": 0.31640625, + "alpha": 0.7650145236690363, + "alpha_r2": 0.8793390357932465, + "condition_number": 32722.75, + "top_10_sv": [ + 19.211088180541992, + 11.109518051147461, + 9.988239288330078, + 9.15766716003418, + 8.11031436920166, + 7.618267059326172, + 7.295526504516602, + 6.896101951599121, + 6.339911460876465, + 6.210816383361816 + ] + }, + "mirror_blocks.0.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 188.94708251953125, + "stable_rank": 7.253108501434326, + "spectral_norm": 12.379569053649902, + "frobenius_norm": 33.34015655517578, + "mp_bound": 2.9505887031555176, + "n_above_mp": 29, + "n_total": 256, + "signal_ratio": 0.11328125, + "alpha": 0.6261064311306485, + "alpha_r2": 0.9396853755505568, + "condition_number": 42.76155090332031, + "top_10_sv": [ + 12.379569053649902, + 8.123998641967773, + 7.572089672088623, + 6.361351013183594, + 6.048568248748779, + 5.31868839263916, + 5.242038726806641, + 5.040452003479004, + 4.775374412536621, + 4.44889497756958 + ] + }, + "mirror_blocks.0.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 242.99041748046875, + "stable_rank": 44.0363883972168, + "spectral_norm": 2.2209346294403076, + "frobenius_norm": 14.738103866577148, + "mp_bound": 2.972583085298538, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.23697130212252424, + "alpha_r2": 0.8904951192277636, + "condition_number": 6.352622032165527, + "top_10_sv": [ + 2.2209346294403076, + 2.1390044689178467, + 1.711495280265808, + 1.5301918983459473, + 1.5054175853729248, + 1.4651620388031006, + 1.433361291885376, + 1.4142837524414062, + 1.4004415273666382, + 1.3914343118667603 + ] + }, + "mirror_blocks.0.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 724.9580688476562, + "stable_rank": 14.895401000976562, + "spectral_norm": 8.279654502868652, + "frobenius_norm": 31.95496368408203, + "mp_bound": 1.0426251888275146, + "n_above_mp": 263, + "n_total": 1024, + "signal_ratio": 0.2568359375, + "alpha": 0.5872495495678323, + "alpha_r2": 0.8157636162899323, + "condition_number": 69645.859375, + "top_10_sv": [ + 8.279654502868652, + 6.268556594848633, + 5.0668230056762695, + 4.177846908569336, + 3.899959087371826, + 3.5115175247192383, + 3.3141400814056396, + 3.1447339057922363, + 3.0798532962799072, + 2.940197467803955 + ] + }, + "mirror_blocks.0.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.0056762695312, + "stable_rank": 36.88116455078125, + "spectral_norm": 12.403251647949219, + "frobenius_norm": 75.32477569580078, + "mp_bound": 6.050300983566083, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.31718163487457196, + "alpha_r2": 0.9028710628332313, + "condition_number": 18.202478408813477, + "top_10_sv": [ + 12.403251647949219, + 10.094819068908691, + 6.713208198547363, + 6.145500183105469, + 5.956831455230713, + 5.782416343688965, + 5.589985370635986, + 5.335856914520264, + 5.182785511016846, + 5.1482696533203125 + ] + }, + "mirror_blocks.0.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 946.919677734375, + "stable_rank": 88.96258544921875, + "spectral_norm": 7.2340216636657715, + "frobenius_norm": 68.23127746582031, + "mp_bound": 5.717814153260884, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.2869582651608591, + "alpha_r2": 0.862859931535132, + "condition_number": 9.720098495483398, + "top_10_sv": [ + 7.2340216636657715, + 6.495790958404541, + 5.4111647605896, + 5.085533142089844, + 4.712813377380371, + 4.433526039123535, + 4.3603901863098145, + 4.254585266113281, + 4.218906402587891, + 4.1409454345703125 + ] + }, + "mirror_blocks.0.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.1751708984375, + "stable_rank": 24.127830505371094, + "spectral_norm": 14.52130126953125, + "frobenius_norm": 71.3287582397461, + "mp_bound": 5.748939883673818, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.31219905283903315, + "alpha_r2": 0.9020512451514214, + "condition_number": 20.667524337768555, + "top_10_sv": [ + 14.52130126953125, + 10.301453590393066, + 6.080916404724121, + 5.617895603179932, + 5.604605197906494, + 5.414775371551514, + 5.197986125946045, + 5.1155171394348145, + 5.0440144538879395, + 4.861530303955078 + ] + }, + "mirror_blocks.0.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 936.1046142578125, + "stable_rank": 26.820751190185547, + "spectral_norm": 13.629375457763672, + "frobenius_norm": 70.5848388671875, + "mp_bound": 5.736858368980213, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.30706744572021016, + "alpha_r2": 0.8951185015859411, + "condition_number": 19.506986618041992, + "top_10_sv": [ + 13.629375457763672, + 8.47607421875, + 7.105331897735596, + 5.499249458312988, + 5.243304252624512, + 5.15825080871582, + 5.046846389770508, + 4.911843776702881, + 4.76882266998291, + 4.646317481994629 + ] + }, + "mirror_blocks.1.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 696.01953125, + "stable_rank": 21.77424430847168, + "spectral_norm": 10.800535202026367, + "frobenius_norm": 50.398406982421875, + "mp_bound": 1.4876697063446045, + "n_above_mp": 273, + "n_total": 1024, + "signal_ratio": 0.2666015625, + "alpha": 0.6495930360098764, + "alpha_r2": 0.8481333896952814, + "condition_number": 57968.41015625, + "top_10_sv": [ + 10.800535202026367, + 8.983768463134766, + 7.663996696472168, + 7.533313751220703, + 6.774697780609131, + 6.49566125869751, + 6.07313871383667, + 5.596704483032227, + 5.422885417938232, + 4.976520538330078 + ] + }, + "mirror_blocks.1.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 211.1052703857422, + "stable_rank": 16.426054000854492, + "spectral_norm": 7.75229024887085, + "frobenius_norm": 31.419309616088867, + "mp_bound": 3.5909392833709717, + "n_above_mp": 12, + "n_total": 256, + "signal_ratio": 0.046875, + "alpha": 0.4882152336188668, + "alpha_r2": 0.8642598956450152, + "condition_number": 24.761934280395508, + "top_10_sv": [ + 7.75229024887085, + 6.1633453369140625, + 5.139535903930664, + 5.061746120452881, + 4.618518352508545, + 4.374049186706543, + 4.027493476867676, + 3.9339475631713867, + 3.7572121620178223, + 3.703809976577759 + ] + }, + "mirror_blocks.1.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 242.44528198242188, + "stable_rank": 83.64839935302734, + "spectral_norm": 1.7805677652359009, + "frobenius_norm": 16.284982681274414, + "mp_bound": 3.1760946214199066, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.23865599385774094, + "alpha_r2": 0.8298790456797973, + "condition_number": 4.862325668334961, + "top_10_sv": [ + 1.7805677652359009, + 1.752925992012024, + 1.7314479351043701, + 1.7006714344024658, + 1.6700440645217896, + 1.638745903968811, + 1.6352427005767822, + 1.603857159614563, + 1.5929089784622192, + 1.584483027458191 + ] + }, + "mirror_blocks.1.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 720.89599609375, + "stable_rank": 35.030216217041016, + "spectral_norm": 5.732750415802002, + "frobenius_norm": 33.93004608154297, + "mp_bound": 1.1078050136566162, + "n_above_mp": 250, + "n_total": 1024, + "signal_ratio": 0.244140625, + "alpha": 0.610132476371035, + "alpha_r2": 0.8395604376360137, + "condition_number": 12390.01171875, + "top_10_sv": [ + 5.732750415802002, + 4.9483232498168945, + 4.747094631195068, + 4.673557758331299, + 4.467545509338379, + 4.270684719085693, + 3.998795747756958, + 3.633718252182007, + 3.5153892040252686, + 3.435293436050415 + ] + }, + "mirror_blocks.1.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 938.849365234375, + "stable_rank": 27.859130859375, + "spectral_norm": 14.204141616821289, + "frobenius_norm": 74.9719467163086, + "mp_bound": 6.154051054317711, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.30267642691099717, + "alpha_r2": 0.892406927489538, + "condition_number": 19.80817985534668, + "top_10_sv": [ + 14.204141616821289, + 6.568716049194336, + 5.939168930053711, + 5.673320770263672, + 5.6025471687316895, + 5.482428550720215, + 5.401841640472412, + 5.3253021240234375, + 5.149769306182861, + 5.103696823120117 + ] + }, + "mirror_blocks.1.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 957.2509765625, + "stable_rank": 114.13573455810547, + "spectral_norm": 6.5968146324157715, + "frobenius_norm": 70.47662353515625, + "mp_bound": 6.167452670784545, + "n_above_mp": 1, + "n_total": 1024, + "signal_ratio": 0.0009765625, + "alpha": 0.25835184607037087, + "alpha_r2": 0.8334390323858647, + "condition_number": 8.301725387573242, + "top_10_sv": [ + 6.5968146324157715, + 5.153751850128174, + 4.772619724273682, + 4.663176536560059, + 4.659829616546631, + 4.528884410858154, + 4.3318705558776855, + 4.253047466278076, + 4.17236328125, + 4.010013103485107 + ] + }, + "mirror_blocks.1.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.5264282226562, + "stable_rank": 37.143211364746094, + "spectral_norm": 11.808619499206543, + "frobenius_norm": 71.96790313720703, + "mp_bound": 5.814992589206637, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.31530753380867077, + "alpha_r2": 0.9081595227531217, + "condition_number": 18.524978637695312, + "top_10_sv": [ + 11.808619499206543, + 10.443424224853516, + 6.187366962432861, + 5.897439002990723, + 5.739169597625732, + 5.6669840812683105, + 5.549097537994385, + 5.300599098205566, + 5.234247207641602, + 5.178868770599365 + ] + }, + "mirror_blocks.1.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.479248046875, + "stable_rank": 19.268285751342773, + "spectral_norm": 16.102224349975586, + "frobenius_norm": 70.6817626953125, + "mp_bound": 5.7072781606356315, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.31001767008960507, + "alpha_r2": 0.9027703817029678, + "condition_number": 24.795528411865234, + "top_10_sv": [ + 16.102224349975586, + 8.242128372192383, + 6.112788677215576, + 5.809852123260498, + 5.620641708374023, + 5.445211410522461, + 5.326540946960449, + 5.169220924377441, + 4.855921745300293, + 4.798176288604736 + ] + }, + "mirror_blocks.2.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 675.5261840820312, + "stable_rank": 28.839494705200195, + "spectral_norm": 8.647916793823242, + "frobenius_norm": 46.441402435302734, + "mp_bound": 1.300352692604065, + "n_above_mp": 266, + "n_total": 1024, + "signal_ratio": 0.259765625, + "alpha": 0.6788251956238536, + "alpha_r2": 0.8725935692128823, + "condition_number": 191794.671875, + "top_10_sv": [ + 8.647916793823242, + 7.902726173400879, + 7.435888290405273, + 6.550820350646973, + 6.413237571716309, + 6.369700908660889, + 6.1822075843811035, + 5.9313507080078125, + 5.48207426071167, + 5.30302619934082 + ] + }, + "mirror_blocks.2.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 207.01856994628906, + "stable_rank": 24.513458251953125, + "spectral_norm": 5.46562385559082, + "frobenius_norm": 27.060888290405273, + "mp_bound": 2.928851544857025, + "n_above_mp": 20, + "n_total": 256, + "signal_ratio": 0.078125, + "alpha": 0.5183744505975887, + "alpha_r2": 0.8418348199501015, + "condition_number": 29.534969329833984, + "top_10_sv": [ + 5.46562385559082, + 4.67855167388916, + 4.028471946716309, + 3.9443235397338867, + 3.9183263778686523, + 3.72249698638916, + 3.5915238857269287, + 3.542348623275757, + 3.3783679008483887, + 3.287583112716675 + ] + }, + "mirror_blocks.2.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 234.90504455566406, + "stable_rank": 90.89004516601562, + "spectral_norm": 2.018869638442993, + "frobenius_norm": 19.247150421142578, + "mp_bound": 3.18257537484169, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.29526949427044086, + "alpha_r2": 0.7413306402049287, + "condition_number": 5.2117414474487305, + "top_10_sv": [ + 2.018869638442993, + 1.995165228843689, + 1.9847723245620728, + 1.9739112854003906, + 1.9674067497253418, + 1.9547622203826904, + 1.950602412223816, + 1.9379189014434814, + 1.9341363906860352, + 1.9190590381622314 + ] + }, + "mirror_blocks.2.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 736.9257202148438, + "stable_rank": 65.72710418701172, + "spectral_norm": 4.509586334228516, + "frobenius_norm": 36.56023406982422, + "mp_bound": 1.229435920715332, + "n_above_mp": 276, + "n_total": 1024, + "signal_ratio": 0.26953125, + "alpha": 0.5903513401699614, + "alpha_r2": 0.7863009661878397, + "condition_number": 4715.7734375, + "top_10_sv": [ + 4.509586334228516, + 3.9957809448242188, + 3.8506786823272705, + 3.5242111682891846, + 3.228558301925659, + 3.2150588035583496, + 3.0832107067108154, + 3.039414405822754, + 2.9777920246124268, + 2.9356257915496826 + ] + }, + "mirror_blocks.2.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 940.0686645507812, + "stable_rank": 26.609861373901367, + "spectral_norm": 14.436392784118652, + "frobenius_norm": 74.46976470947266, + "mp_bound": 6.138091297423093, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.2981227747467673, + "alpha_r2": 0.883451489947269, + "condition_number": 21.50666618347168, + "top_10_sv": [ + 14.436392784118652, + 6.408453941345215, + 5.646579742431641, + 5.466150760650635, + 5.317550182342529, + 5.237943649291992, + 4.957888126373291, + 4.907560348510742, + 4.853755474090576, + 4.746955394744873 + ] + }, + "mirror_blocks.2.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 960.3915405273438, + "stable_rank": 146.8135986328125, + "spectral_norm": 5.805617332458496, + "frobenius_norm": 70.3447265625, + "mp_bound": 6.216685476364057, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.24527062964950513, + "alpha_r2": 0.7999041631154247, + "condition_number": 11.226006507873535, + "top_10_sv": [ + 5.805617332458496, + 4.337494373321533, + 4.216872692108154, + 3.8949217796325684, + 3.843276023864746, + 3.7987165451049805, + 3.7046706676483154, + 3.6928791999816895, + 3.684807538986206, + 3.6697158813476562 + ] + }, + "mirror_blocks.2.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 935.3277587890625, + "stable_rank": 42.07974624633789, + "spectral_norm": 10.988144874572754, + "frobenius_norm": 71.27889251708984, + "mp_bound": 5.783765546301514, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.3135829720412232, + "alpha_r2": 0.9036264157598348, + "condition_number": 17.606632232666016, + "top_10_sv": [ + 10.988144874572754, + 8.51516342163086, + 5.989171028137207, + 5.643622875213623, + 5.568560600280762, + 5.4336676597595215, + 5.272459030151367, + 5.175837993621826, + 5.057547569274902, + 5.012577533721924 + ] + }, + "mirror_blocks.2.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 932.9176635742188, + "stable_rank": 17.355676651000977, + "spectral_norm": 17.057790756225586, + "frobenius_norm": 71.06300354003906, + "mp_bound": 5.716098426205654, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.3096054851822462, + "alpha_r2": 0.8985904581003552, + "condition_number": 27.722566604614258, + "top_10_sv": [ + 17.057790756225586, + 8.130094528198242, + 5.897670745849609, + 5.571244716644287, + 5.35498046875, + 5.1931071281433105, + 5.030824661254883, + 4.9875054359436035, + 4.8272528648376465, + 4.735513210296631 + ] + }, + "mirror_blocks.3.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 668.78369140625, + "stable_rank": 23.825225830078125, + "spectral_norm": 9.025834083557129, + "frobenius_norm": 44.05607986450195, + "mp_bound": 1.2180376052856445, + "n_above_mp": 252, + "n_total": 1024, + "signal_ratio": 0.24609375, + "alpha": 0.6714705408796237, + "alpha_r2": 0.8894113940930285, + "condition_number": 9159.8994140625, + "top_10_sv": [ + 9.025834083557129, + 8.449153900146484, + 7.761612415313721, + 6.909491062164307, + 6.815699577331543, + 6.587741374969482, + 6.336292743682861, + 6.108401775360107, + 5.909632205963135, + 5.797994136810303 + ] + }, + "mirror_blocks.3.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 202.90269470214844, + "stable_rank": 16.88824462890625, + "spectral_norm": 5.720531463623047, + "frobenius_norm": 23.50870132446289, + "mp_bound": 2.597637891769409, + "n_above_mp": 22, + "n_total": 256, + "signal_ratio": 0.0859375, + "alpha": 0.560918184646865, + "alpha_r2": 0.9454252339465418, + "condition_number": 17.736957550048828, + "top_10_sv": [ + 5.720531463623047, + 4.991501331329346, + 4.488971710205078, + 4.229294300079346, + 4.1405839920043945, + 4.011020660400391, + 3.7293944358825684, + 3.6631901264190674, + 3.4800527095794678, + 3.3853368759155273 + ] + }, + "mirror_blocks.3.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 235.09652709960938, + "stable_rank": 83.52083587646484, + "spectral_norm": 2.71895170211792, + "frobenius_norm": 24.848426818847656, + "mp_bound": 4.155252456665039, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.3012730456697759, + "alpha_r2": 0.7738300630737882, + "condition_number": 5.078914642333984, + "top_10_sv": [ + 2.71895170211792, + 2.6971206665039062, + 2.6846930980682373, + 2.6274614334106445, + 2.6093835830688477, + 2.595674514770508, + 2.577486991882324, + 2.5672426223754883, + 2.558316230773926, + 2.5405144691467285 + ] + }, + "mirror_blocks.3.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 753.1178588867188, + "stable_rank": 89.79225158691406, + "spectral_norm": 4.5304741859436035, + "frobenius_norm": 42.93021774291992, + "mp_bound": 1.4889023303985596, + "n_above_mp": 294, + "n_total": 1024, + "signal_ratio": 0.287109375, + "alpha": 0.5563091203015265, + "alpha_r2": 0.7360928181080963, + "condition_number": 242209.875, + "top_10_sv": [ + 4.5304741859436035, + 4.124688625335693, + 3.5388174057006836, + 3.2116539478302, + 3.1639211177825928, + 3.1510696411132812, + 3.096708059310913, + 3.071613311767578, + 3.0455527305603027, + 3.0422189235687256 + ] + }, + "mirror_blocks.3.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 940.68798828125, + "stable_rank": 28.452688217163086, + "spectral_norm": 13.86345386505127, + "frobenius_norm": 73.94913482666016, + "mp_bound": 6.111544385094665, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.2968641976341947, + "alpha_r2": 0.8803475686827303, + "condition_number": 23.159500122070312, + "top_10_sv": [ + 13.86345386505127, + 6.375607490539551, + 5.392757415771484, + 5.209571838378906, + 5.160251140594482, + 4.990292072296143, + 4.942629337310791, + 4.851228713989258, + 4.816100597381592, + 4.773686408996582 + ] + }, + "mirror_blocks.3.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 961.6058349609375, + "stable_rank": 205.84022521972656, + "spectral_norm": 4.900354862213135, + "frobenius_norm": 70.30603790283203, + "mp_bound": 6.248393247044619, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.24032078047185002, + "alpha_r2": 0.7889632178244936, + "condition_number": 10.522656440734863, + "top_10_sv": [ + 4.900354862213135, + 4.166226387023926, + 3.9043843746185303, + 3.8041279315948486, + 3.758477210998535, + 3.6743099689483643, + 3.652327537536621, + 3.6425416469573975, + 3.6395983695983887, + 3.616722822189331 + ] + }, + "mirror_blocks.3.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 936.0010986328125, + "stable_rank": 39.38264846801758, + "spectral_norm": 11.31739330291748, + "frobenius_norm": 71.02297973632812, + "mp_bound": 5.770463722639487, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.31180313149770134, + "alpha_r2": 0.9017039245785317, + "condition_number": 19.92220115661621, + "top_10_sv": [ + 11.31739330291748, + 7.555158615112305, + 5.742516040802002, + 5.567230701446533, + 5.41655158996582, + 5.353912353515625, + 5.188925743103027, + 4.998415470123291, + 4.943516731262207, + 4.905675411224365 + ] + }, + "mirror_blocks.3.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.6157836914062, + "stable_rank": 15.875147819519043, + "spectral_norm": 17.873126983642578, + "frobenius_norm": 71.21302795410156, + "mp_bound": 5.750105990458919, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.30663938343953634, + "alpha_r2": 0.895230441041454, + "condition_number": 29.761030197143555, + "top_10_sv": [ + 17.873126983642578, + 7.781844139099121, + 5.405266761779785, + 5.315219402313232, + 5.125383377075195, + 4.982079029083252, + 4.951698303222656, + 4.816822528839111, + 4.754363536834717, + 4.7095770835876465 + ] + }, + "mirror_blocks.4.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 659.3209838867188, + "stable_rank": 25.979053497314453, + "spectral_norm": 8.93734073638916, + "frobenius_norm": 45.553314208984375, + "mp_bound": 1.2160660028457642, + "n_above_mp": 264, + "n_total": 1024, + "signal_ratio": 0.2578125, + "alpha": 0.6902247363978936, + "alpha_r2": 0.8874139081937663, + "condition_number": 65475.60546875, + "top_10_sv": [ + 8.93734073638916, + 8.294153213500977, + 7.478105068206787, + 6.971124649047852, + 6.827032566070557, + 6.5097336769104, + 6.360535144805908, + 6.191610336303711, + 5.969921112060547, + 5.941476345062256 + ] + }, + "mirror_blocks.4.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 205.96295166015625, + "stable_rank": 15.29781436920166, + "spectral_norm": 6.348496913909912, + "frobenius_norm": 24.830507278442383, + "mp_bound": 2.733478367328644, + "n_above_mp": 20, + "n_total": 256, + "signal_ratio": 0.078125, + "alpha": 0.5387611133099715, + "alpha_r2": 0.9134764112743838, + "condition_number": 19.85637664794922, + "top_10_sv": [ + 6.348496913909912, + 4.729928493499756, + 4.280251502990723, + 4.090523719787598, + 3.962505578994751, + 3.8929145336151123, + 3.6975111961364746, + 3.560894727706909, + 3.4651498794555664, + 3.3689146041870117 + ] + }, + "mirror_blocks.4.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 233.59268188476562, + "stable_rank": 78.71143341064453, + "spectral_norm": 2.7420403957366943, + "frobenius_norm": 24.32723617553711, + "mp_bound": 3.916230082511902, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.32123134972886624, + "alpha_r2": 0.758066683475472, + "condition_number": 5.189210414886475, + "top_10_sv": [ + 2.7420403957366943, + 2.681504249572754, + 2.612483263015747, + 2.607248306274414, + 2.5621135234832764, + 2.5517048835754395, + 2.5153214931488037, + 2.509310007095337, + 2.491889715194702, + 2.474046230316162 + ] + }, + "mirror_blocks.4.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 757.7962036132812, + "stable_rank": 133.30650329589844, + "spectral_norm": 3.6752734184265137, + "frobenius_norm": 42.43413162231445, + "mp_bound": 1.5383262634277344, + "n_above_mp": 263, + "n_total": 1024, + "signal_ratio": 0.2568359375, + "alpha": 0.5484115406509759, + "alpha_r2": 0.754146508512419, + "condition_number": 3930.852294921875, + "top_10_sv": [ + 3.6752734184265137, + 3.559894561767578, + 3.494556188583374, + 3.3160152435302734, + 3.2314062118530273, + 3.2022900581359863, + 3.188082218170166, + 3.1670987606048584, + 3.13281512260437, + 3.121824264526367 + ] + }, + "mirror_blocks.4.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 940.1866455078125, + "stable_rank": 27.31290626525879, + "spectral_norm": 14.135549545288086, + "frobenius_norm": 73.8748550415039, + "mp_bound": 6.090530147964709, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.29774696514032334, + "alpha_r2": 0.8819129543914068, + "condition_number": 24.85938835144043, + "top_10_sv": [ + 14.135549545288086, + 6.220456600189209, + 5.565029621124268, + 5.410929203033447, + 5.311360836029053, + 5.172118186950684, + 4.925539016723633, + 4.822296619415283, + 4.759129524230957, + 4.697517395019531 + ] + }, + "mirror_blocks.4.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 961.8186645507812, + "stable_rank": 223.22413635253906, + "spectral_norm": 4.696602821350098, + "frobenius_norm": 70.17047119140625, + "mp_bound": 6.245199877290805, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.23814388518502785, + "alpha_r2": 0.7801186762757512, + "condition_number": 9.319127082824707, + "top_10_sv": [ + 4.696602821350098, + 3.8831467628479004, + 3.7380733489990234, + 3.6682209968566895, + 3.653320789337158, + 3.64449405670166, + 3.6254005432128906, + 3.6136178970336914, + 3.600184440612793, + 3.5930025577545166 + ] + }, + "mirror_blocks.4.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 935.709716796875, + "stable_rank": 41.43883514404297, + "spectral_norm": 11.013794898986816, + "frobenius_norm": 70.89910888671875, + "mp_bound": 5.757246494908439, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.31256865519274685, + "alpha_r2": 0.9018310196185085, + "condition_number": 20.188791275024414, + "top_10_sv": [ + 11.013794898986816, + 7.111091136932373, + 5.797022819519043, + 5.584803104400635, + 5.569212436676025, + 5.372109413146973, + 5.156131744384766, + 5.0494232177734375, + 5.016009330749512, + 4.94374942779541 + ] + }, + "mirror_blocks.4.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 932.884765625, + "stable_rank": 15.5200777053833, + "spectral_norm": 17.99614715576172, + "frobenius_norm": 70.89677429199219, + "mp_bound": 5.695126740322417, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.30722776532562235, + "alpha_r2": 0.8919362240971842, + "condition_number": 31.941059112548828, + "top_10_sv": [ + 17.99614715576172, + 7.60745906829834, + 5.339727401733398, + 5.13498067855835, + 5.05626106262207, + 4.953657627105713, + 4.860082149505615, + 4.75862455368042, + 4.624716281890869, + 4.573176383972168 + ] + }, + "mirror_blocks.5.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 695.643310546875, + "stable_rank": 22.929719924926758, + "spectral_norm": 9.618481636047363, + "frobenius_norm": 46.05808639526367, + "mp_bound": 1.3775806427001953, + "n_above_mp": 262, + "n_total": 1024, + "signal_ratio": 0.255859375, + "alpha": 0.6498366965533406, + "alpha_r2": 0.859700373997673, + "condition_number": 24184.84765625, + "top_10_sv": [ + 9.618481636047363, + 6.7257184982299805, + 6.150356769561768, + 5.825774192810059, + 5.74788761138916, + 5.4049153327941895, + 5.323736667633057, + 5.2327046394348145, + 5.12689733505249, + 5.103203773498535 + ] + }, + "mirror_blocks.5.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 220.0661163330078, + "stable_rank": 15.914340019226074, + "spectral_norm": 6.57172155380249, + "frobenius_norm": 26.2164249420166, + "mp_bound": 3.523664653301239, + "n_above_mp": 5, + "n_total": 256, + "signal_ratio": 0.01953125, + "alpha": 0.43238379072811556, + "alpha_r2": 0.8920653461687482, + "condition_number": 16.296154022216797, + "top_10_sv": [ + 6.57172155380249, + 4.317174434661865, + 4.024784564971924, + 3.720271587371826, + 3.6963441371917725, + 3.436851978302002, + 3.4051930904388428, + 3.2744929790496826, + 3.129648447036743, + 3.101102113723755 + ] + }, + "mirror_blocks.5.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 232.851806640625, + "stable_rank": 66.37837219238281, + "spectral_norm": 2.84710693359375, + "frobenius_norm": 23.196212768554688, + "mp_bound": 3.906809628009796, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.3451138203758199, + "alpha_r2": 0.8572176283838381, + "condition_number": 5.4493513107299805, + "top_10_sv": [ + 2.84710693359375, + 2.8348937034606934, + 2.765777111053467, + 2.7372217178344727, + 2.725008964538574, + 2.6839492321014404, + 2.663705825805664, + 2.6535849571228027, + 2.642749071121216, + 2.603186845779419 + ] + }, + "mirror_blocks.5.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 725.841552734375, + "stable_rank": 66.45878601074219, + "spectral_norm": 5.074488639831543, + "frobenius_norm": 41.368377685546875, + "mp_bound": 1.345879316329956, + "n_above_mp": 272, + "n_total": 1024, + "signal_ratio": 0.265625, + "alpha": 0.6105563187102377, + "alpha_r2": 0.8106867983982253, + "condition_number": 116176.3984375, + "top_10_sv": [ + 5.074488639831543, + 4.992178916931152, + 4.793430805206299, + 4.162780284881592, + 4.0929999351501465, + 3.9281368255615234, + 3.893636465072632, + 3.7693252563476562, + 3.6722195148468018, + 3.591952085494995 + ] + }, + "mirror_blocks.5.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 939.879150390625, + "stable_rank": 28.433774948120117, + "spectral_norm": 13.783834457397461, + "frobenius_norm": 73.5, + "mp_bound": 6.060812661193209, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.2988348536661662, + "alpha_r2": 0.8840784313158607, + "condition_number": 24.808433532714844, + "top_10_sv": [ + 13.783834457397461, + 6.067254543304443, + 5.563858985900879, + 5.493241310119629, + 5.434630393981934, + 5.171338081359863, + 5.0092878341674805, + 4.894378662109375, + 4.781822204589844, + 4.764616012573242 + ] + }, + "mirror_blocks.5.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 962.5578002929688, + "stable_rank": 257.51593017578125, + "spectral_norm": 4.353452205657959, + "frobenius_norm": 69.86116027832031, + "mp_bound": 6.220005486733108, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.23684387615978084, + "alpha_r2": 0.7821212071843551, + "condition_number": 7.595459938049316, + "top_10_sv": [ + 4.353452205657959, + 4.138293266296387, + 3.7534923553466797, + 3.747767925262451, + 3.697023630142212, + 3.6714394092559814, + 3.6464784145355225, + 3.5992918014526367, + 3.5736517906188965, + 3.572458505630493 + ] + }, + "mirror_blocks.5.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 935.311279296875, + "stable_rank": 37.35165023803711, + "spectral_norm": 11.596614837646484, + "frobenius_norm": 70.87386322021484, + "mp_bound": 5.7540277970315765, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.31333360946727273, + "alpha_r2": 0.9045186545701961, + "condition_number": 21.73819923400879, + "top_10_sv": [ + 11.596614837646484, + 6.921240329742432, + 5.9483962059021, + 5.860919952392578, + 5.70407247543335, + 5.528343677520752, + 5.192611217498779, + 5.174591541290283, + 5.031411647796631, + 4.999974727630615 + ] + }, + "mirror_blocks.5.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.3309936523438, + "stable_rank": 15.628568649291992, + "spectral_norm": 17.900846481323242, + "frobenius_norm": 70.76738739013672, + "mp_bound": 5.694886629715928, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.30668365638724626, + "alpha_r2": 0.89256335390433, + "condition_number": 31.84065055847168, + "top_10_sv": [ + 17.900846481323242, + 7.109786510467529, + 5.478981971740723, + 5.264373779296875, + 5.042883396148682, + 4.9086594581604, + 4.80668830871582, + 4.770280361175537, + 4.753232955932617, + 4.599517822265625 + ] + }, + "mirror_blocks.6.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 690.5945434570312, + "stable_rank": 25.66936492919922, + "spectral_norm": 9.128558158874512, + "frobenius_norm": 46.24979019165039, + "mp_bound": 1.3391921520233154, + "n_above_mp": 277, + "n_total": 1024, + "signal_ratio": 0.2705078125, + "alpha": 0.6617243996327002, + "alpha_r2": 0.8531991999428439, + "condition_number": 14061.2861328125, + "top_10_sv": [ + 9.128558158874512, + 6.674101829528809, + 5.996070861816406, + 5.833440780639648, + 5.6288957595825195, + 5.524519443511963, + 5.329051971435547, + 5.284647464752197, + 4.979404449462891, + 4.877297878265381 + ] + }, + "mirror_blocks.6.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 215.31143188476562, + "stable_rank": 17.792217254638672, + "spectral_norm": 6.275065898895264, + "frobenius_norm": 26.4687442779541, + "mp_bound": 3.2855671048164368, + "n_above_mp": 7, + "n_total": 256, + "signal_ratio": 0.02734375, + "alpha": 0.4513467446917798, + "alpha_r2": 0.8565969495018264, + "condition_number": 25.4302978515625, + "top_10_sv": [ + 6.275065898895264, + 4.296389102935791, + 3.949702739715576, + 3.8314099311828613, + 3.630023717880249, + 3.4392220973968506, + 3.3322579860687256, + 3.23199462890625, + 3.1604878902435303, + 3.066929340362549 + ] + }, + "mirror_blocks.6.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 234.04400634765625, + "stable_rank": 75.24044036865234, + "spectral_norm": 2.5551929473876953, + "frobenius_norm": 22.1640625, + "mp_bound": 3.730217546224594, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.32921333064046754, + "alpha_r2": 0.8210774461774968, + "condition_number": 5.3407416343688965, + "top_10_sv": [ + 2.5551929473876953, + 2.5337116718292236, + 2.4994685649871826, + 2.480562448501587, + 2.4531822204589844, + 2.418194532394409, + 2.4076313972473145, + 2.391889810562134, + 2.381805181503296, + 2.3679895401000977 + ] + }, + "mirror_blocks.6.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 745.9483642578125, + "stable_rank": 85.4196548461914, + "spectral_norm": 4.365376949310303, + "frobenius_norm": 40.34601593017578, + "mp_bound": 1.428241491317749, + "n_above_mp": 252, + "n_total": 1024, + "signal_ratio": 0.24609375, + "alpha": 0.574228909771744, + "alpha_r2": 0.792693311782334, + "condition_number": 42105.1015625, + "top_10_sv": [ + 4.365376949310303, + 4.1989970207214355, + 3.907623529434204, + 3.6155974864959717, + 3.485995054244995, + 3.4423394203186035, + 3.378070116043091, + 3.340437412261963, + 3.335007667541504, + 3.290475845336914 + ] + }, + "mirror_blocks.6.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 939.1995239257812, + "stable_rank": 24.376602172851562, + "spectral_norm": 14.879915237426758, + "frobenius_norm": 73.46611022949219, + "mp_bound": 6.034717602579947, + "n_above_mp": 1, + "n_total": 1024, + "signal_ratio": 0.0009765625, + "alpha": 0.2993599517386563, + "alpha_r2": 0.8860561649361656, + "condition_number": 26.614994049072266, + "top_10_sv": [ + 14.879915237426758, + 6.025299072265625, + 5.574345111846924, + 5.515914440155029, + 5.3650641441345215, + 5.144935131072998, + 5.044266700744629, + 4.939062118530273, + 4.838558197021484, + 4.763415336608887 + ] + }, + "mirror_blocks.6.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 962.30029296875, + "stable_rank": 220.38674926757812, + "spectral_norm": 4.692926406860352, + "frobenius_norm": 69.66850280761719, + "mp_bound": 6.215004702118631, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.23822240537583556, + "alpha_r2": 0.7885570607575301, + "condition_number": 7.936253070831299, + "top_10_sv": [ + 4.692926406860352, + 4.510348796844482, + 3.9502971172332764, + 3.69740629196167, + 3.678804874420166, + 3.666692018508911, + 3.6417453289031982, + 3.6243226528167725, + 3.5855419635772705, + 3.571301221847534 + ] + }, + "mirror_blocks.6.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 934.6430053710938, + "stable_rank": 38.63431930541992, + "spectral_norm": 11.377074241638184, + "frobenius_norm": 70.71592712402344, + "mp_bound": 5.723519566258538, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.3151998851018473, + "alpha_r2": 0.9058100743530724, + "condition_number": 21.40561866760254, + "top_10_sv": [ + 11.377074241638184, + 6.558457374572754, + 6.283778667449951, + 5.846485614776611, + 5.7009453773498535, + 5.586912155151367, + 5.324425220489502, + 5.209959030151367, + 5.179622173309326, + 5.027733325958252 + ] + }, + "mirror_blocks.6.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.10400390625, + "stable_rank": 15.90274715423584, + "spectral_norm": 17.684099197387695, + "frobenius_norm": 70.52108764648438, + "mp_bound": 5.679857934624531, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.30788674741320626, + "alpha_r2": 0.8961915588900076, + "condition_number": 32.12616729736328, + "top_10_sv": [ + 17.684099197387695, + 6.935358047485352, + 5.611484050750732, + 5.450901031494141, + 5.159399509429932, + 5.026580810546875, + 4.989626407623291, + 4.859060764312744, + 4.810643196105957, + 4.6494903564453125 + ] + }, + "mirror_blocks.7.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 713.4393920898438, + "stable_rank": 22.43491554260254, + "spectral_norm": 10.032463073730469, + "frobenius_norm": 47.51927185058594, + "mp_bound": 1.5024712085723877, + "n_above_mp": 262, + "n_total": 1024, + "signal_ratio": 0.255859375, + "alpha": 0.6233435722065556, + "alpha_r2": 0.8389591603731373, + "condition_number": 15434.236328125, + "top_10_sv": [ + 10.032463073730469, + 7.3711724281311035, + 5.998594284057617, + 5.6278181076049805, + 5.46798849105835, + 5.363643646240234, + 5.033144474029541, + 4.887186050415039, + 4.855867385864258, + 4.75378942489624 + ] + }, + "mirror_blocks.7.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 222.8997344970703, + "stable_rank": 14.80882453918457, + "spectral_norm": 7.2740302085876465, + "frobenius_norm": 27.992094039916992, + "mp_bound": 3.999657779932022, + "n_above_mp": 3, + "n_total": 256, + "signal_ratio": 0.01171875, + "alpha": 0.41331453138263385, + "alpha_r2": 0.9132484891737451, + "condition_number": 22.707794189453125, + "top_10_sv": [ + 7.2740302085876465, + 4.753385066986084, + 4.33350133895874, + 3.8954622745513916, + 3.6613080501556396, + 3.6319446563720703, + 3.5839431285858154, + 3.4977712631225586, + 3.384801149368286, + 3.3376522064208984 + ] + }, + "mirror_blocks.7.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 242.89794921875, + "stable_rank": 94.84819030761719, + "spectral_norm": 2.1185214519500732, + "frobenius_norm": 20.632287979125977, + "mp_bound": 4.075343012809753, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.23326027048043338, + "alpha_r2": 0.8158139123937141, + "condition_number": 3.984344720840454, + "top_10_sv": [ + 2.1185214519500732, + 2.096585512161255, + 2.0770840644836426, + 2.063124656677246, + 2.0428309440612793, + 2.0320327281951904, + 2.023172378540039, + 2.004295587539673, + 1.9936774969100952, + 1.9826067686080933 + ] + }, + "mirror_blocks.7.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 743.5704345703125, + "stable_rank": 91.8500747680664, + "spectral_norm": 4.115333080291748, + "frobenius_norm": 39.440711975097656, + "mp_bound": 1.3934342861175537, + "n_above_mp": 243, + "n_total": 1024, + "signal_ratio": 0.2373046875, + "alpha": 0.5783419308841269, + "alpha_r2": 0.8076087671636116, + "condition_number": 7065.724609375, + "top_10_sv": [ + 4.115333080291748, + 3.882228374481201, + 3.7874104976654053, + 3.6883599758148193, + 3.590550661087036, + 3.564465045928955, + 3.534191846847534, + 3.504263401031494, + 3.4756851196289062, + 3.4737632274627686 + ] + }, + "mirror_blocks.7.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 938.8828735351562, + "stable_rank": 26.32727813720703, + "spectral_norm": 14.27689266204834, + "frobenius_norm": 73.25489807128906, + "mp_bound": 6.021627522026597, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.30086093817674614, + "alpha_r2": 0.8896334932395706, + "condition_number": 27.449453353881836, + "top_10_sv": [ + 14.27689266204834, + 6.434393882751465, + 5.722557544708252, + 5.604750156402588, + 5.46126127243042, + 5.374871730804443, + 5.10688591003418, + 5.034847736358643, + 4.925204277038574, + 4.890027046203613 + ] + }, + "mirror_blocks.7.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 961.698974609375, + "stable_rank": 170.0816192626953, + "spectral_norm": 5.325859069824219, + "frobenius_norm": 69.4573745727539, + "mp_bound": 6.177495778135295, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.24077609648892534, + "alpha_r2": 0.7955954649026982, + "condition_number": 9.363985061645508, + "top_10_sv": [ + 5.325859069824219, + 4.40978479385376, + 4.185070037841797, + 3.895057439804077, + 3.7840309143066406, + 3.746607542037964, + 3.6984970569610596, + 3.624131441116333, + 3.6105198860168457, + 3.5710272789001465 + ] + }, + "mirror_blocks.7.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 934.0111694335938, + "stable_rank": 38.740394592285156, + "spectral_norm": 11.379335403442383, + "frobenius_norm": 70.82701110839844, + "mp_bound": 5.719267480961343, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.31633937015208247, + "alpha_r2": 0.9071282689495428, + "condition_number": 20.50752067565918, + "top_10_sv": [ + 11.379335403442383, + 6.830854415893555, + 6.323436737060547, + 5.985747337341309, + 5.817127704620361, + 5.648834228515625, + 5.396520614624023, + 5.224006652832031, + 5.196556091308594, + 5.1718621253967285 + ] + }, + "mirror_blocks.7.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 934.5953369140625, + "stable_rank": 17.416709899902344, + "spectral_norm": 16.804960250854492, + "frobenius_norm": 70.13269805908203, + "mp_bound": 5.671777756809953, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3061424010953889, + "alpha_r2": 0.8950960755417341, + "condition_number": 31.14119529724121, + "top_10_sv": [ + 16.804960250854492, + 6.593157768249512, + 5.608399391174316, + 5.492972373962402, + 5.184736728668213, + 5.104874134063721, + 4.958425998687744, + 4.856645107269287, + 4.73080587387085, + 4.586895942687988 + ] + }, + "mirror_blocks.8.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 708.7847290039062, + "stable_rank": 22.624868392944336, + "spectral_norm": 10.367616653442383, + "frobenius_norm": 49.3141975402832, + "mp_bound": 1.5332320928573608, + "n_above_mp": 263, + "n_total": 1024, + "signal_ratio": 0.2568359375, + "alpha": 0.6313900126588273, + "alpha_r2": 0.8431991048269009, + "condition_number": 6997.51318359375, + "top_10_sv": [ + 10.367616653442383, + 7.487847805023193, + 6.699296951293945, + 6.283844947814941, + 5.869067668914795, + 5.464127063751221, + 5.285478115081787, + 5.107686996459961, + 4.977625370025635, + 4.867602348327637 + ] + }, + "mirror_blocks.8.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 222.934814453125, + "stable_rank": 14.15692138671875, + "spectral_norm": 7.719427108764648, + "frobenius_norm": 29.044872283935547, + "mp_bound": 4.130968272686005, + "n_above_mp": 2, + "n_total": 256, + "signal_ratio": 0.0078125, + "alpha": 0.40654842500508515, + "alpha_r2": 0.8947060047094757, + "condition_number": 18.947622299194336, + "top_10_sv": [ + 7.719427108764648, + 4.777718544006348, + 4.024661540985107, + 3.929851770401001, + 3.8574132919311523, + 3.681696891784668, + 3.57102632522583, + 3.513458490371704, + 3.455169200897217, + 3.388367176055908 + ] + }, + "mirror_blocks.8.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 242.82174682617188, + "stable_rank": 57.3770866394043, + "spectral_norm": 2.455564498901367, + "frobenius_norm": 18.60032844543457, + "mp_bound": 3.702546000480652, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.2469961262981949, + "alpha_r2": 0.9054386415188996, + "condition_number": 5.1941022872924805, + "top_10_sv": [ + 2.455564498901367, + 2.3859848976135254, + 2.2942311763763428, + 2.1560420989990234, + 2.1271562576293945, + 2.0149993896484375, + 1.9784237146377563, + 1.9277706146240234, + 1.8405474424362183, + 1.8338909149169922 + ] + }, + "mirror_blocks.8.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 744.23876953125, + "stable_rank": 75.36436462402344, + "spectral_norm": 4.424859046936035, + "frobenius_norm": 38.41337585449219, + "mp_bound": 1.3768699169158936, + "n_above_mp": 233, + "n_total": 1024, + "signal_ratio": 0.2275390625, + "alpha": 0.5759859956319195, + "alpha_r2": 0.8186290266548157, + "condition_number": 25172.1875, + "top_10_sv": [ + 4.424859046936035, + 4.324452877044678, + 4.212471961975098, + 4.0055623054504395, + 3.9864373207092285, + 3.9474308490753174, + 3.881716251373291, + 3.7692489624023438, + 3.6952016353607178, + 3.5881783962249756 + ] + }, + "mirror_blocks.8.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 939.1094970703125, + "stable_rank": 29.88106918334961, + "spectral_norm": 13.30937385559082, + "frobenius_norm": 72.75379943847656, + "mp_bound": 5.979648690887857, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3012775890485647, + "alpha_r2": 0.8918981608202192, + "condition_number": 23.843961715698242, + "top_10_sv": [ + 13.30937385559082, + 6.357924461364746, + 5.745584487915039, + 5.567939281463623, + 5.485853672027588, + 5.218508243560791, + 5.193744659423828, + 5.022536277770996, + 4.95093297958374, + 4.880833625793457 + ] + }, + "mirror_blocks.8.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 962.4315795898438, + "stable_rank": 162.45176696777344, + "spectral_norm": 5.428140640258789, + "frobenius_norm": 69.18521881103516, + "mp_bound": 6.182671833361259, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.23935230299780846, + "alpha_r2": 0.7969871711009201, + "condition_number": 9.506427764892578, + "top_10_sv": [ + 5.428140640258789, + 4.33837890625, + 4.140955924987793, + 3.8865857124328613, + 3.7426953315734863, + 3.679211139678955, + 3.658932685852051, + 3.5904488563537598, + 3.5614984035491943, + 3.555729627609253 + ] + }, + "mirror_blocks.8.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 932.8895874023438, + "stable_rank": 38.66987609863281, + "spectral_norm": 11.35371208190918, + "frobenius_norm": 70.60318756103516, + "mp_bound": 5.687799820887269, + "n_above_mp": 6, + "n_total": 1024, + "signal_ratio": 0.005859375, + "alpha": 0.31881180618117017, + "alpha_r2": 0.910799649707321, + "condition_number": 19.516006469726562, + "top_10_sv": [ + 11.35371208190918, + 7.110798358917236, + 6.526391506195068, + 6.0809431076049805, + 5.928285121917725, + 5.7620110511779785, + 5.59823751449585, + 5.318066596984863, + 5.263365268707275, + 5.231112957000732 + ] + }, + "mirror_blocks.8.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 934.1748046875, + "stable_rank": 19.08730697631836, + "spectral_norm": 15.961655616760254, + "frobenius_norm": 69.73490905761719, + "mp_bound": 5.651122672464809, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.30832480049308747, + "alpha_r2": 0.8978174350481211, + "condition_number": 27.034299850463867, + "top_10_sv": [ + 15.961655616760254, + 6.6953325271606445, + 5.7051897048950195, + 5.583639144897461, + 5.292130470275879, + 5.1545515060424805, + 5.096606254577637, + 4.889472007751465, + 4.8003363609313965, + 4.741666316986084 + ] + }, + "mirror_blocks.9.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 711.29833984375, + "stable_rank": 17.04989242553711, + "spectral_norm": 12.446403503417969, + "frobenius_norm": 51.393089294433594, + "mp_bound": 1.6202669143676758, + "n_above_mp": 257, + "n_total": 1024, + "signal_ratio": 0.2509765625, + "alpha": 0.6195715672602753, + "alpha_r2": 0.8450078434594082, + "condition_number": 11394.068359375, + "top_10_sv": [ + 12.446403503417969, + 8.286026000976562, + 7.579743385314941, + 6.884772777557373, + 6.39437198638916, + 6.195257663726807, + 5.972519874572754, + 5.657010078430176, + 5.515270233154297, + 5.180325984954834 + ] + }, + "mirror_blocks.9.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 221.03575134277344, + "stable_rank": 11.84292221069336, + "spectral_norm": 9.067399024963379, + "frobenius_norm": 31.20413589477539, + "mp_bound": 4.4118854105472565, + "n_above_mp": 4, + "n_total": 256, + "signal_ratio": 0.015625, + "alpha": 0.41431871539108134, + "alpha_r2": 0.9181762157096446, + "condition_number": 24.452407836914062, + "top_10_sv": [ + 9.067399024963379, + 5.231292724609375, + 4.916482925415039, + 4.71070671081543, + 4.254949569702148, + 4.178931713104248, + 3.9478561878204346, + 3.825223922729492, + 3.580286741256714, + 3.5541818141937256 + ] + }, + "mirror_blocks.9.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 247.06988525390625, + "stable_rank": 118.23783874511719, + "spectral_norm": 1.7152295112609863, + "frobenius_norm": 18.650930404663086, + "mp_bound": 3.993492990732193, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.17727484939472438, + "alpha_r2": 0.7712191301123956, + "condition_number": 3.1767354011535645, + "top_10_sv": [ + 1.7152295112609863, + 1.7044662237167358, + 1.6948500871658325, + 1.6776847839355469, + 1.6665598154067993, + 1.6515754461288452, + 1.649651288986206, + 1.6434025764465332, + 1.6338359117507935, + 1.6252888441085815 + ] + }, + "mirror_blocks.9.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 762.5900268554688, + "stable_rank": 73.32815551757812, + "spectral_norm": 4.416748046875, + "frobenius_norm": 37.821434020996094, + "mp_bound": 1.4359127283096313, + "n_above_mp": 232, + "n_total": 1024, + "signal_ratio": 0.2265625, + "alpha": 0.5409222702442117, + "alpha_r2": 0.7793055554705087, + "condition_number": 16710.64453125, + "top_10_sv": [ + 4.416748046875, + 3.5991451740264893, + 3.226005792617798, + 3.175535202026367, + 3.0847887992858887, + 3.070218086242676, + 3.0571746826171875, + 3.017014980316162, + 3.001363515853882, + 2.9834465980529785 + ] + }, + "mirror_blocks.9.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 939.15869140625, + "stable_rank": 33.91809844970703, + "spectral_norm": 12.474991798400879, + "frobenius_norm": 72.65341186523438, + "mp_bound": 5.979165430300114, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.3023417348783871, + "alpha_r2": 0.8912821809445377, + "condition_number": 20.458080291748047, + "top_10_sv": [ + 12.474991798400879, + 6.470742702484131, + 5.993317604064941, + 5.597293853759766, + 5.551023006439209, + 5.396394729614258, + 5.178300857543945, + 4.999173641204834, + 4.912963390350342, + 4.83324670791626 + ] + }, + "mirror_blocks.9.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 960.6434326171875, + "stable_rank": 164.9027099609375, + "spectral_norm": 5.40290641784668, + "frobenius_norm": 69.3811264038086, + "mp_bound": 6.149875966514581, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.2464693074630583, + "alpha_r2": 0.8122832697061805, + "condition_number": 7.622074127197266, + "top_10_sv": [ + 5.40290641784668, + 4.749518871307373, + 4.469895362854004, + 4.379708766937256, + 4.0926337242126465, + 3.8626513481140137, + 3.8501579761505127, + 3.7479913234710693, + 3.7273378372192383, + 3.668347120285034 + ] + }, + "mirror_blocks.9.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.2415161132812, + "stable_rank": 42.43833541870117, + "spectral_norm": 10.883054733276367, + "frobenius_norm": 70.89734649658203, + "mp_bound": 5.709281108606219, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3181657510181233, + "alpha_r2": 0.9079556474814661, + "condition_number": 18.700748443603516, + "top_10_sv": [ + 10.883054733276367, + 7.358108997344971, + 6.594238758087158, + 6.183475971221924, + 5.726895809173584, + 5.7068681716918945, + 5.496458053588867, + 5.343929290771484, + 5.221693515777588, + 5.036039352416992 + ] + }, + "mirror_blocks.9.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 934.9693603515625, + "stable_rank": 24.683242797851562, + "spectral_norm": 13.965388298034668, + "frobenius_norm": 69.38317108154297, + "mp_bound": 5.628721467316778, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.310157806664775, + "alpha_r2": 0.9004216399760955, + "condition_number": 25.719921112060547, + "top_10_sv": [ + 13.965388298034668, + 6.544376850128174, + 5.703333854675293, + 5.672152042388916, + 5.447852611541748, + 5.258004188537598, + 5.147795677185059, + 4.94088077545166, + 4.726480484008789, + 4.615532398223877 + ] + }, + "mirror_blocks.10.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 715.0506591796875, + "stable_rank": 18.026710510253906, + "spectral_norm": 12.307391166687012, + "frobenius_norm": 52.25456619262695, + "mp_bound": 1.6443891525268555, + "n_above_mp": 266, + "n_total": 1024, + "signal_ratio": 0.259765625, + "alpha": 0.6190577890980717, + "alpha_r2": 0.8331037486878023, + "condition_number": 14410.1494140625, + "top_10_sv": [ + 12.307391166687012, + 7.391971111297607, + 6.891163349151611, + 6.478761672973633, + 5.85650110244751, + 5.661088943481445, + 5.545260906219482, + 5.3275346755981445, + 5.236697673797607, + 4.959378719329834 + ] + }, + "mirror_blocks.10.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 218.35302734375, + "stable_rank": 11.719382286071777, + "spectral_norm": 9.351849555969238, + "frobenius_norm": 32.014732360839844, + "mp_bound": 4.410071432590485, + "n_above_mp": 5, + "n_total": 256, + "signal_ratio": 0.01953125, + "alpha": 0.4220288957054601, + "alpha_r2": 0.9069362942369227, + "condition_number": 31.275053024291992, + "top_10_sv": [ + 9.351849555969238, + 5.434247016906738, + 5.1260576248168945, + 4.664729118347168, + 4.590224266052246, + 4.215663909912109, + 3.9440078735351562, + 3.856670618057251, + 3.7856268882751465, + 3.7042362689971924 + ] + }, + "mirror_blocks.10.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 242.09017944335938, + "stable_rank": 96.60252380371094, + "spectral_norm": 1.7553597688674927, + "frobenius_norm": 17.252830505371094, + "mp_bound": 3.335477113723755, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.2396909564516281, + "alpha_r2": 0.7970155230904775, + "condition_number": 4.241220474243164, + "top_10_sv": [ + 1.7553597688674927, + 1.728592872619629, + 1.7165671586990356, + 1.7077291011810303, + 1.6864830255508423, + 1.6765621900558472, + 1.6719224452972412, + 1.6577045917510986, + 1.6539642810821533, + 1.6451408863067627 + ] + }, + "mirror_blocks.10.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 774.3184204101562, + "stable_rank": 70.33253479003906, + "spectral_norm": 4.335371017456055, + "frobenius_norm": 36.35837173461914, + "mp_bound": 1.45585298538208, + "n_above_mp": 212, + "n_total": 1024, + "signal_ratio": 0.20703125, + "alpha": 0.5196329580318612, + "alpha_r2": 0.7753519005094018, + "condition_number": 3017.63330078125, + "top_10_sv": [ + 4.335371017456055, + 3.578299045562744, + 3.272932291030884, + 3.040836811065674, + 2.9765424728393555, + 2.81828236579895, + 2.7957470417022705, + 2.7511024475097656, + 2.7211036682128906, + 2.688098669052124 + ] + }, + "mirror_blocks.10.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 938.5880126953125, + "stable_rank": 42.61520767211914, + "spectral_norm": 11.057217597961426, + "frobenius_norm": 72.18187713623047, + "mp_bound": 5.934581854987181, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3045587486830561, + "alpha_r2": 0.8915950578631386, + "condition_number": 20.82532501220703, + "top_10_sv": [ + 11.057217597961426, + 6.340665817260742, + 5.904058456420898, + 5.616177558898926, + 5.516621112823486, + 5.353770732879639, + 5.197311878204346, + 5.016531467437744, + 4.952464580535889, + 4.864062786102295 + ] + }, + "mirror_blocks.10.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 955.3253173828125, + "stable_rank": 126.1723403930664, + "spectral_norm": 6.180022239685059, + "frobenius_norm": 69.41800689697266, + "mp_bound": 6.032750620544087, + "n_above_mp": 1, + "n_total": 1024, + "signal_ratio": 0.0009765625, + "alpha": 0.2620662808096614, + "alpha_r2": 0.8359871289092786, + "condition_number": 11.733057022094727, + "top_10_sv": [ + 6.180022239685059, + 5.058028697967529, + 4.588860988616943, + 4.50522518157959, + 4.353050231933594, + 4.234713077545166, + 4.066932678222656, + 4.03006649017334, + 3.988665819168091, + 3.9656946659088135 + ] + }, + "mirror_blocks.10.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 931.4880981445312, + "stable_rank": 47.028038024902344, + "spectral_norm": 10.358336448669434, + "frobenius_norm": 71.03435516357422, + "mp_bound": 5.667892929296944, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3224178866496389, + "alpha_r2": 0.9074978450938898, + "condition_number": 19.201627731323242, + "top_10_sv": [ + 10.358336448669434, + 7.540536403656006, + 6.372748374938965, + 6.180090427398682, + 5.795101642608643, + 5.607732772827148, + 5.531103134155273, + 5.381441116333008, + 5.296590805053711, + 5.140068054199219 + ] + }, + "mirror_blocks.10.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 934.0856323242188, + "stable_rank": 33.16695022583008, + "spectral_norm": 12.047493934631348, + "frobenius_norm": 69.3824234008789, + "mp_bound": 5.607379990837049, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.3140386361337802, + "alpha_r2": 0.9016595592920099, + "condition_number": 22.627227783203125, + "top_10_sv": [ + 12.047493934631348, + 6.224437713623047, + 5.9016289710998535, + 5.674208641052246, + 5.374418258666992, + 5.356305122375488, + 5.1710100173950195, + 5.080776691436768, + 4.842743396759033, + 4.83020544052124 + ] + }, + "mirror_blocks.11.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 714.9497680664062, + "stable_rank": 15.653985023498535, + "spectral_norm": 13.170188903808594, + "frobenius_norm": 52.10800552368164, + "mp_bound": 1.6412835121154785, + "n_above_mp": 264, + "n_total": 1024, + "signal_ratio": 0.2578125, + "alpha": 0.615841352588578, + "alpha_r2": 0.8346231921961134, + "condition_number": 177537.3125, + "top_10_sv": [ + 13.170188903808594, + 7.998973369598389, + 7.199948310852051, + 6.877292633056641, + 5.979377269744873, + 5.612356662750244, + 5.58411169052124, + 5.295141696929932, + 5.064724445343018, + 5.030208587646484 + ] + }, + "mirror_blocks.11.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 219.5272979736328, + "stable_rank": 10.528435707092285, + "spectral_norm": 10.02354621887207, + "frobenius_norm": 32.52395248413086, + "mp_bound": 4.4782150983810425, + "n_above_mp": 4, + "n_total": 256, + "signal_ratio": 0.015625, + "alpha": 0.41023944662310485, + "alpha_r2": 0.8922973001008994, + "condition_number": 31.906288146972656, + "top_10_sv": [ + 10.02354621887207, + 5.548998832702637, + 4.94153356552124, + 4.539906978607178, + 4.195741176605225, + 4.111823081970215, + 3.867884397506714, + 3.8063926696777344, + 3.6784873008728027, + 3.6648223400115967 + ] + }, + "mirror_blocks.11.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 244.0906219482422, + "stable_rank": 99.99604797363281, + "spectral_norm": 1.704901099205017, + "frobenius_norm": 17.048673629760742, + "mp_bound": 3.4094028174877167, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.21826692382530663, + "alpha_r2": 0.7963713951629093, + "condition_number": 3.7913296222686768, + "top_10_sv": [ + 1.704901099205017, + 1.6752073764801025, + 1.6620216369628906, + 1.646759033203125, + 1.6319626569747925, + 1.6289738416671753, + 1.6160423755645752, + 1.6025718450546265, + 1.5946871042251587, + 1.581324577331543 + ] + }, + "mirror_blocks.11.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 776.8302612304688, + "stable_rank": 93.7181625366211, + "spectral_norm": 3.7507107257843018, + "frobenius_norm": 36.309932708740234, + "mp_bound": 1.4583911895751953, + "n_above_mp": 217, + "n_total": 1024, + "signal_ratio": 0.2119140625, + "alpha": 0.5146130644873622, + "alpha_r2": 0.7636101685825356, + "condition_number": 6790.8115234375, + "top_10_sv": [ + 3.7507107257843018, + 3.2563669681549072, + 3.104218006134033, + 2.977094888687134, + 2.8612241744995117, + 2.83172869682312, + 2.803534746170044, + 2.7646703720092773, + 2.749598264694214, + 2.7241640090942383 + ] + }, + "mirror_blocks.11.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 940.2883911132812, + "stable_rank": 49.9245719909668, + "spectral_norm": 10.201071739196777, + "frobenius_norm": 72.07804107666016, + "mp_bound": 5.949699705071705, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3011514501588628, + "alpha_r2": 0.8845860827909939, + "condition_number": 20.120023727416992, + "top_10_sv": [ + 10.201071739196777, + 6.316970348358154, + 5.588222026824951, + 5.429642677307129, + 5.215307712554932, + 5.133322238922119, + 4.9971089363098145, + 4.855438232421875, + 4.762356281280518, + 4.719285488128662 + ] + }, + "mirror_blocks.11.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 948.9266357421875, + "stable_rank": 143.64559936523438, + "spectral_norm": 5.815410137176514, + "frobenius_norm": 69.6989974975586, + "mp_bound": 5.917122673208156, + "n_above_mp": 0, + "n_total": 1024, + "signal_ratio": 0.0, + "alpha": 0.2820783327163478, + "alpha_r2": 0.8663500868147462, + "condition_number": 10.315106391906738, + "top_10_sv": [ + 5.815410137176514, + 5.35853910446167, + 5.2132368087768555, + 5.075122833251953, + 4.844822883605957, + 4.744629383087158, + 4.686278343200684, + 4.590193271636963, + 4.4816813468933105, + 4.469357490539551 + ] + }, + "mirror_blocks.11.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.0435180664062, + "stable_rank": 48.990779876708984, + "spectral_norm": 10.144248962402344, + "frobenius_norm": 71.00305938720703, + "mp_bound": 5.706146500097874, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.3191192833160187, + "alpha_r2": 0.9035937743285013, + "condition_number": 18.677425384521484, + "top_10_sv": [ + 10.144248962402344, + 6.757993221282959, + 6.3780927658081055, + 5.757233142852783, + 5.644091606140137, + 5.455899238586426, + 5.409149169921875, + 5.347768306732178, + 5.145373821258545, + 4.975387096405029 + ] + }, + "mirror_blocks.11.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 935.1820678710938, + "stable_rank": 38.949188232421875, + "spectral_norm": 11.14587688446045, + "frobenius_norm": 69.56061553955078, + "mp_bound": 5.640355180794904, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3127321699728672, + "alpha_r2": 0.8980519850682651, + "condition_number": 20.536279678344727, + "top_10_sv": [ + 11.14587688446045, + 6.159939765930176, + 5.639197826385498, + 5.542326927185059, + 5.470362663269043, + 5.136144161224365, + 5.06137228012085, + 4.95586633682251, + 4.816535949707031, + 4.730994701385498 + ] + }, + "mirror_blocks.12.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 696.465087890625, + "stable_rank": 14.873617172241211, + "spectral_norm": 13.008182525634766, + "frobenius_norm": 50.16778564453125, + "mp_bound": 1.49580979347229, + "n_above_mp": 265, + "n_total": 1024, + "signal_ratio": 0.2587890625, + "alpha": 0.645553584875853, + "alpha_r2": 0.855779503817791, + "condition_number": 106662.765625, + "top_10_sv": [ + 13.008182525634766, + 7.621262550354004, + 6.906698703765869, + 6.606993675231934, + 6.330167770385742, + 5.8978047370910645, + 5.72519063949585, + 5.60040283203125, + 5.42011833190918, + 5.280104160308838 + ] + }, + "mirror_blocks.12.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 218.51364135742188, + "stable_rank": 10.064372062683105, + "spectral_norm": 9.24000072479248, + "frobenius_norm": 29.313343048095703, + "mp_bound": 3.933238238096237, + "n_above_mp": 4, + "n_total": 256, + "signal_ratio": 0.015625, + "alpha": 0.4325500619022907, + "alpha_r2": 0.907461804991039, + "condition_number": 30.26241111755371, + "top_10_sv": [ + 9.24000072479248, + 5.1959757804870605, + 4.413267612457275, + 4.1288838386535645, + 3.8656837940216064, + 3.7386345863342285, + 3.6508541107177734, + 3.5083096027374268, + 3.4535012245178223, + 3.3948209285736084 + ] + }, + "mirror_blocks.12.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 243.09762573242188, + "stable_rank": 85.82266998291016, + "spectral_norm": 1.9153207540512085, + "frobenius_norm": 17.74363136291504, + "mp_bound": 3.485766649246216, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.226968631244226, + "alpha_r2": 0.8079097307543233, + "condition_number": 4.127514362335205, + "top_10_sv": [ + 1.9153207540512085, + 1.819595456123352, + 1.789638638496399, + 1.7835880517959595, + 1.7581334114074707, + 1.746589183807373, + 1.7226272821426392, + 1.7067776918411255, + 1.6969380378723145, + 1.6701574325561523 + ] + }, + "mirror_blocks.12.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 757.789306640625, + "stable_rank": 105.72237396240234, + "spectral_norm": 3.6483616828918457, + "frobenius_norm": 37.51295852661133, + "mp_bound": 1.3988747596740723, + "n_above_mp": 239, + "n_total": 1024, + "signal_ratio": 0.2333984375, + "alpha": 0.5506028894595268, + "alpha_r2": 0.7832822309581396, + "condition_number": 4733.16259765625, + "top_10_sv": [ + 3.6483616828918457, + 3.5227508544921875, + 3.406222343444824, + 3.3871326446533203, + 3.3031539916992188, + 3.2664084434509277, + 3.1962461471557617, + 3.158512592315674, + 3.112751007080078, + 3.096844434738159 + ] + }, + "mirror_blocks.12.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 939.894775390625, + "stable_rank": 39.798545837402344, + "spectral_norm": 11.506302833557129, + "frobenius_norm": 72.58876037597656, + "mp_bound": 5.97152596182656, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3006161483833617, + "alpha_r2": 0.8853879834370245, + "condition_number": 22.592741012573242, + "top_10_sv": [ + 11.506302833557129, + 6.563206195831299, + 5.844398021697998, + 5.388382434844971, + 5.279862880706787, + 5.217649459838867, + 5.153339862823486, + 5.087172031402588, + 4.779506206512451, + 4.736834526062012 + ] + }, + "mirror_blocks.12.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 945.4767456054688, + "stable_rank": 126.8713607788086, + "spectral_norm": 6.201172351837158, + "frobenius_norm": 69.84825897216797, + "mp_bound": 5.842198032734297, + "n_above_mp": 1, + "n_total": 1024, + "signal_ratio": 0.0009765625, + "alpha": 0.2906509381075571, + "alpha_r2": 0.8698703260252443, + "condition_number": 11.191490173339844, + "top_10_sv": [ + 6.201172351837158, + 5.2328410148620605, + 5.119720935821533, + 4.9528489112854, + 4.932304382324219, + 4.75206995010376, + 4.660051345825195, + 4.630064487457275, + 4.498002052307129, + 4.4747819900512695 + ] + }, + "mirror_blocks.12.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 934.94482421875, + "stable_rank": 40.76757049560547, + "spectral_norm": 11.129790306091309, + "frobenius_norm": 71.06314086914062, + "mp_bound": 5.757032218987458, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.31318490026158347, + "alpha_r2": 0.8978496623402398, + "condition_number": 21.227514266967773, + "top_10_sv": [ + 11.129790306091309, + 6.613030910491943, + 6.06736946105957, + 5.8841729164123535, + 5.57126522064209, + 5.420281410217285, + 5.3419013023376465, + 5.236693859100342, + 5.005307674407959, + 4.949530601501465 + ] + }, + "mirror_blocks.12.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 937.758544921875, + "stable_rank": 39.82889938354492, + "spectral_norm": 10.9086332321167, + "frobenius_norm": 68.84453582763672, + "mp_bound": 5.633850918796334, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3061158456051847, + "alpha_r2": 0.8915073119144331, + "condition_number": 21.601226806640625, + "top_10_sv": [ + 10.9086332321167, + 6.348767280578613, + 5.514016151428223, + 5.43626070022583, + 5.143150329589844, + 5.108470439910889, + 4.8789262771606445, + 4.798532485961914, + 4.705221652984619, + 4.574278831481934 + ] + }, + "mirror_blocks.13.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 709.9591674804688, + "stable_rank": 15.972458839416504, + "spectral_norm": 12.381279945373535, + "frobenius_norm": 49.482479095458984, + "mp_bound": 1.5373115539550781, + "n_above_mp": 262, + "n_total": 1024, + "signal_ratio": 0.255859375, + "alpha": 0.6259816799489261, + "alpha_r2": 0.8410288225935127, + "condition_number": 14729.294921875, + "top_10_sv": [ + 12.381279945373535, + 7.700345039367676, + 6.572876453399658, + 5.888210773468018, + 5.765105724334717, + 5.456506252288818, + 5.295698165893555, + 5.250854969024658, + 5.03156042098999, + 4.749350070953369 + ] + }, + "mirror_blocks.13.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 221.9441375732422, + "stable_rank": 11.09493637084961, + "spectral_norm": 8.764705657958984, + "frobenius_norm": 29.194414138793945, + "mp_bound": 4.163373917341232, + "n_above_mp": 3, + "n_total": 256, + "signal_ratio": 0.01171875, + "alpha": 0.4055165572651397, + "alpha_r2": 0.9055753626828817, + "condition_number": 30.999008178710938, + "top_10_sv": [ + 8.764705657958984, + 4.9452433586120605, + 4.1977620124816895, + 3.978464365005493, + 3.8703646659851074, + 3.622251510620117, + 3.5399763584136963, + 3.436047315597534, + 3.375932216644287, + 3.273601531982422 + ] + }, + "mirror_blocks.13.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 241.7525177001953, + "stable_rank": 77.8870620727539, + "spectral_norm": 2.0911409854888916, + "frobenius_norm": 18.455081939697266, + "mp_bound": 3.54290708899498, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.24723364897829297, + "alpha_r2": 0.8335810366460908, + "condition_number": 4.463647842407227, + "top_10_sv": [ + 2.0911409854888916, + 2.0161099433898926, + 1.9807920455932617, + 1.9569084644317627, + 1.9315381050109863, + 1.8971012830734253, + 1.8804582357406616, + 1.8634296655654907, + 1.8366801738739014, + 1.8274919986724854 + ] + }, + "mirror_blocks.13.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 756.4520874023438, + "stable_rank": 80.18118286132812, + "spectral_norm": 4.291501998901367, + "frobenius_norm": 38.42780303955078, + "mp_bound": 1.4420945644378662, + "n_above_mp": 228, + "n_total": 1024, + "signal_ratio": 0.22265625, + "alpha": 0.5539880183352716, + "alpha_r2": 0.7989090148474791, + "condition_number": 6336.25341796875, + "top_10_sv": [ + 4.291501998901367, + 4.039663791656494, + 3.9344558715820312, + 3.813274383544922, + 3.6739773750305176, + 3.5607786178588867, + 3.5530295372009277, + 3.4411494731903076, + 3.4137277603149414, + 3.375180721282959 + ] + }, + "mirror_blocks.13.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 939.6322631835938, + "stable_rank": 39.10139083862305, + "spectral_norm": 11.563501358032227, + "frobenius_norm": 72.30785369873047, + "mp_bound": 5.962505097522003, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.3018051885911065, + "alpha_r2": 0.8890258015511643, + "condition_number": 21.195045471191406, + "top_10_sv": [ + 11.563501358032227, + 6.663970470428467, + 5.807022571563721, + 5.47370719909668, + 5.389070510864258, + 5.188050270080566, + 5.134092330932617, + 5.006446838378906, + 4.95887565612793, + 4.851858615875244 + ] + }, + "mirror_blocks.13.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 943.1398315429688, + "stable_rank": 108.81976318359375, + "spectral_norm": 6.679588794708252, + "frobenius_norm": 69.67927551269531, + "mp_bound": 5.7883332200118796, + "n_above_mp": 1, + "n_total": 1024, + "signal_ratio": 0.0009765625, + "alpha": 0.29684667183603686, + "alpha_r2": 0.8759638608328276, + "condition_number": 12.510334014892578, + "top_10_sv": [ + 6.679588794708252, + 5.237009048461914, + 5.209030628204346, + 4.974550247192383, + 4.902350902557373, + 4.769472122192383, + 4.709086894989014, + 4.656009197235107, + 4.616931915283203, + 4.503769874572754 + ] + }, + "mirror_blocks.13.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 933.9867553710938, + "stable_rank": 35.47219467163086, + "spectral_norm": 11.909363746643066, + "frobenius_norm": 70.93042755126953, + "mp_bound": 5.7343650685474294, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3148642733189654, + "alpha_r2": 0.9034827148114366, + "condition_number": 21.10725212097168, + "top_10_sv": [ + 11.909363746643066, + 7.3027729988098145, + 6.282888889312744, + 6.0135111808776855, + 5.842795372009277, + 5.589134216308594, + 5.433507442474365, + 5.337557315826416, + 5.033231735229492, + 4.912911891937256 + ] + }, + "mirror_blocks.13.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 937.3253173828125, + "stable_rank": 36.31221008300781, + "spectral_norm": 11.340376853942871, + "frobenius_norm": 68.336669921875, + "mp_bound": 5.572652094764316, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.30661654515936093, + "alpha_r2": 0.8946785381749814, + "condition_number": 21.96826934814453, + "top_10_sv": [ + 11.340376853942871, + 6.586040496826172, + 5.633627414703369, + 5.599245071411133, + 5.482135772705078, + 5.157806396484375, + 5.019090175628662, + 4.813208103179932, + 4.680241584777832, + 4.5515899658203125 + ] + }, + "mirror_blocks.14.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 697.5416259765625, + "stable_rank": 16.897974014282227, + "spectral_norm": 11.678322792053223, + "frobenius_norm": 48.00625228881836, + "mp_bound": 1.4037836790084839, + "n_above_mp": 282, + "n_total": 1024, + "signal_ratio": 0.275390625, + "alpha": 0.648964338590965, + "alpha_r2": 0.8408482426603474, + "condition_number": 20373.634765625, + "top_10_sv": [ + 11.678322792053223, + 6.956737518310547, + 6.309643268585205, + 5.977978229522705, + 5.578909397125244, + 5.417087554931641, + 5.137640476226807, + 4.996939659118652, + 4.853672027587891, + 4.7611589431762695 + ] + }, + "mirror_blocks.14.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 219.03147888183594, + "stable_rank": 11.208623886108398, + "spectral_norm": 8.529848098754883, + "frobenius_norm": 28.55731964111328, + "mp_bound": 3.89571949839592, + "n_above_mp": 3, + "n_total": 256, + "signal_ratio": 0.01171875, + "alpha": 0.41161084841106454, + "alpha_r2": 0.879826006954375, + "condition_number": 35.475135803222656, + "top_10_sv": [ + 8.529848098754883, + 4.489371299743652, + 3.9982194900512695, + 3.8955321311950684, + 3.7369461059570312, + 3.5482966899871826, + 3.413762092590332, + 3.346292734146118, + 3.171548366546631, + 3.127803325653076 + ] + }, + "mirror_blocks.14.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 242.34657287597656, + "stable_rank": 80.78938293457031, + "spectral_norm": 1.9743609428405762, + "frobenius_norm": 17.746131896972656, + "mp_bound": 3.4450705647468567, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.2307521255276575, + "alpha_r2": 0.8051731795520419, + "condition_number": 4.6434855461120605, + "top_10_sv": [ + 1.9743609428405762, + 1.8398374319076538, + 1.8117241859436035, + 1.7852483987808228, + 1.7673953771591187, + 1.7465437650680542, + 1.734883189201355, + 1.722620964050293, + 1.7051063776016235, + 1.6972216367721558 + ] + }, + "mirror_blocks.14.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 742.3014526367188, + "stable_rank": 93.94414520263672, + "spectral_norm": 3.8442819118499756, + "frobenius_norm": 37.2606201171875, + "mp_bound": 1.2882250547409058, + "n_above_mp": 263, + "n_total": 1024, + "signal_ratio": 0.2568359375, + "alpha": 0.5808964889452646, + "alpha_r2": 0.7919959364685919, + "condition_number": 2741.0703125, + "top_10_sv": [ + 3.8442819118499756, + 3.637497663497925, + 3.5710225105285645, + 3.537470817565918, + 3.4183132648468018, + 3.3697872161865234, + 3.3051464557647705, + 3.2620716094970703, + 3.1824517250061035, + 3.1255290508270264 + ] + }, + "mirror_blocks.14.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 936.3666381835938, + "stable_rank": 33.551544189453125, + "spectral_norm": 12.557463645935059, + "frobenius_norm": 72.73746490478516, + "mp_bound": 5.922065709702084, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.30907285736585066, + "alpha_r2": 0.8986515578235017, + "condition_number": 22.658405303955078, + "top_10_sv": [ + 12.557463645935059, + 7.118464469909668, + 6.138530731201172, + 5.792100429534912, + 5.60252571105957, + 5.410584926605225, + 5.263728141784668, + 5.142266273498535, + 5.092854976654053, + 5.040014266967773 + ] + }, + "mirror_blocks.14.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 939.3607788085938, + "stable_rank": 109.86365509033203, + "spectral_norm": 6.640163898468018, + "frobenius_norm": 69.59945678710938, + "mp_bound": 5.704613642091046, + "n_above_mp": 2, + "n_total": 1024, + "signal_ratio": 0.001953125, + "alpha": 0.30666705472929906, + "alpha_r2": 0.8875362848911785, + "condition_number": 10.808683395385742, + "top_10_sv": [ + 6.640163898468018, + 5.746023654937744, + 5.433533668518066, + 5.353193759918213, + 5.0923566818237305, + 4.9909820556640625, + 4.861114025115967, + 4.807570934295654, + 4.716190814971924, + 4.641566276550293 + ] + }, + "mirror_blocks.14.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 930.44091796875, + "stable_rank": 30.96547508239746, + "spectral_norm": 12.708294868469238, + "frobenius_norm": 70.71737670898438, + "mp_bound": 5.6479789458321665, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3217209404068691, + "alpha_r2": 0.9100996227456836, + "condition_number": 23.98003387451172, + "top_10_sv": [ + 12.708294868469238, + 7.669070243835449, + 6.3701171875, + 6.188457489013672, + 5.937990188598633, + 5.633401870727539, + 5.6147966384887695, + 5.345260143280029, + 5.256329536437988, + 5.107630729675293 + ] + }, + "mirror_blocks.14.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 935.9773559570312, + "stable_rank": 36.42756652832031, + "spectral_norm": 11.159740447998047, + "frobenius_norm": 67.35489654541016, + "mp_bound": 5.478253167041783, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.3100275990423222, + "alpha_r2": 0.8996885790150614, + "condition_number": 21.800729751586914, + "top_10_sv": [ + 11.159740447998047, + 6.735589504241943, + 5.72880744934082, + 5.517705917358398, + 5.405825614929199, + 5.173426151275635, + 4.986207008361816, + 4.890574932098389, + 4.8433074951171875, + 4.618099212646484 + ] + }, + "mirror_blocks.15.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 698.0100708007812, + "stable_rank": 14.12426471710205, + "spectral_norm": 12.92407512664795, + "frobenius_norm": 48.571598052978516, + "mp_bound": 1.4502960443496704, + "n_above_mp": 266, + "n_total": 1024, + "signal_ratio": 0.259765625, + "alpha": 0.6406851769656655, + "alpha_r2": 0.8533552838995164, + "condition_number": 28851.146484375, + "top_10_sv": [ + 12.92407512664795, + 7.524083137512207, + 7.211349010467529, + 6.819272518157959, + 6.2347187995910645, + 5.666784763336182, + 5.552811145782471, + 5.175304889678955, + 5.0952019691467285, + 4.917961597442627 + ] + }, + "mirror_blocks.15.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 215.24574279785156, + "stable_rank": 9.585249900817871, + "spectral_norm": 9.147680282592773, + "frobenius_norm": 28.32126808166504, + "mp_bound": 3.7071792483329773, + "n_above_mp": 6, + "n_total": 256, + "signal_ratio": 0.0234375, + "alpha": 0.4552706687453839, + "alpha_r2": 0.9254453078850622, + "condition_number": 41.05019760131836, + "top_10_sv": [ + 9.147680282592773, + 5.0791015625, + 4.687919616699219, + 4.476082801818848, + 4.0097808837890625, + 3.725461721420288, + 3.693479299545288, + 3.5684854984283447, + 3.4303340911865234, + 3.3677151203155518 + ] + }, + "mirror_blocks.15.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 241.88998413085938, + "stable_rank": 84.92568969726562, + "spectral_norm": 1.9672569036483765, + "frobenius_norm": 18.129281997680664, + "mp_bound": 3.5086421370506287, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.23659737087489893, + "alpha_r2": 0.7959238595778274, + "condition_number": 4.454681873321533, + "top_10_sv": [ + 1.9672569036483765, + 1.8490815162658691, + 1.8270435333251953, + 1.7996671199798584, + 1.7982233762741089, + 1.7781366109848022, + 1.7711842060089111, + 1.7458291053771973, + 1.7380094528198242, + 1.724470853805542 + ] + }, + "mirror_blocks.15.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 741.6381225585938, + "stable_rank": 104.56897735595703, + "spectral_norm": 3.7035470008850098, + "frobenius_norm": 37.87208938598633, + "mp_bound": 1.300483226776123, + "n_above_mp": 266, + "n_total": 1024, + "signal_ratio": 0.259765625, + "alpha": 0.582182188522983, + "alpha_r2": 0.7880113451024957, + "condition_number": 3740.406005859375, + "top_10_sv": [ + 3.7035470008850098, + 3.529146194458008, + 3.495568037033081, + 3.376060962677002, + 3.316063642501831, + 3.2922139167785645, + 3.2415528297424316, + 3.2158665657043457, + 3.19065523147583, + 3.1617650985717773 + ] + }, + "mirror_blocks.15.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 931.0350952148438, + "stable_rank": 28.6345157623291, + "spectral_norm": 13.649828910827637, + "frobenius_norm": 73.04190826416016, + "mp_bound": 5.847385232334402, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3205310760486544, + "alpha_r2": 0.9078041656313058, + "condition_number": 26.799556732177734, + "top_10_sv": [ + 13.649828910827637, + 7.165524482727051, + 6.272665500640869, + 6.0195417404174805, + 6.002742290496826, + 5.793951988220215, + 5.732149600982666, + 5.441635608673096, + 5.2520246505737305, + 5.224968433380127 + ] + }, + "mirror_blocks.15.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 933.7098388671875, + "stable_rank": 99.88690185546875, + "spectral_norm": 6.9578857421875, + "frobenius_norm": 69.53949737548828, + "mp_bound": 5.573329368774603, + "n_above_mp": 3, + "n_total": 1024, + "signal_ratio": 0.0029296875, + "alpha": 0.3200205739054121, + "alpha_r2": 0.8954614231597993, + "condition_number": 12.476043701171875, + "top_10_sv": [ + 6.9578857421875, + 5.995143413543701, + 5.769176483154297, + 5.482438087463379, + 5.307949066162109, + 5.160090446472168, + 5.035862922668457, + 4.931918621063232, + 4.836297035217285, + 4.83005952835083 + ] + }, + "mirror_blocks.15.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 925.3292846679688, + "stable_rank": 25.051773071289062, + "spectral_norm": 14.136820793151855, + "frobenius_norm": 70.75725555419922, + "mp_bound": 5.558833070829661, + "n_above_mp": 8, + "n_total": 1024, + "signal_ratio": 0.0078125, + "alpha": 0.3318318420182958, + "alpha_r2": 0.9182918876226351, + "condition_number": 28.154159545898438, + "top_10_sv": [ + 14.136820793151855, + 7.495446681976318, + 6.4098663330078125, + 6.388991832733154, + 5.94157075881958, + 5.910908222198486, + 5.860252380371094, + 5.597021579742432, + 5.299474239349365, + 5.239851474761963 + ] + }, + "mirror_blocks.15.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 932.7788696289062, + "stable_rank": 37.20389938354492, + "spectral_norm": 10.911831855773926, + "frobenius_norm": 66.55671691894531, + "mp_bound": 5.3525740073557255, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3183270785605616, + "alpha_r2": 0.9076167022721382, + "condition_number": 22.13448143005371, + "top_10_sv": [ + 10.911831855773926, + 6.749984264373779, + 5.802445888519287, + 5.571719646453857, + 5.42951774597168, + 5.282364845275879, + 5.161341667175293, + 4.951720714569092, + 4.882364749908447, + 4.775622367858887 + ] + }, + "mirror_blocks.16.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 692.9827880859375, + "stable_rank": 15.077933311462402, + "spectral_norm": 12.52715015411377, + "frobenius_norm": 48.64331817626953, + "mp_bound": 1.4175679683685303, + "n_above_mp": 273, + "n_total": 1024, + "signal_ratio": 0.2666015625, + "alpha": 0.6525427305370284, + "alpha_r2": 0.8515104026937118, + "condition_number": 19868.990234375, + "top_10_sv": [ + 12.52715015411377, + 7.1949920654296875, + 6.767822265625, + 6.466320037841797, + 5.953493595123291, + 5.440323829650879, + 5.31725549697876, + 5.174378871917725, + 5.06400203704834, + 4.801645278930664 + ] + }, + "mirror_blocks.16.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 216.2462921142578, + "stable_rank": 11.177544593811035, + "spectral_norm": 8.485119819641113, + "frobenius_norm": 28.368160247802734, + "mp_bound": 3.662528246641159, + "n_above_mp": 5, + "n_total": 256, + "signal_ratio": 0.01953125, + "alpha": 0.44581144393074357, + "alpha_r2": 0.8877184928418376, + "condition_number": 38.95733642578125, + "top_10_sv": [ + 8.485119819641113, + 4.7601141929626465, + 4.366655349731445, + 4.12785005569458, + 3.781933069229126, + 3.58194637298584, + 3.50522780418396, + 3.4163918495178223, + 3.2992920875549316, + 3.278211832046509 + ] + }, + "mirror_blocks.16.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 240.64251708984375, + "stable_rank": 87.38280487060547, + "spectral_norm": 1.8977673053741455, + "frobenius_norm": 17.740095138549805, + "mp_bound": 3.34202116727829, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.24387520371591054, + "alpha_r2": 0.779672008344288, + "condition_number": 4.518321514129639, + "top_10_sv": [ + 1.8977673053741455, + 1.837438702583313, + 1.7873655557632446, + 1.7776175737380981, + 1.7442907094955444, + 1.7382516860961914, + 1.7225285768508911, + 1.7079226970672607, + 1.6990540027618408, + 1.678932785987854 + ] + }, + "mirror_blocks.16.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 753.7825927734375, + "stable_rank": 90.87615203857422, + "spectral_norm": 3.929586887359619, + "frobenius_norm": 37.46035385131836, + "mp_bound": 1.3569040298461914, + "n_above_mp": 253, + "n_total": 1024, + "signal_ratio": 0.2470703125, + "alpha": 0.5590062314189737, + "alpha_r2": 0.7823464544802642, + "condition_number": 153183.078125, + "top_10_sv": [ + 3.929586887359619, + 3.5750386714935303, + 3.465914249420166, + 3.408383369445801, + 3.366081953048706, + 3.2667429447174072, + 3.2237396240234375, + 3.1776010990142822, + 3.1573116779327393, + 3.106376886367798 + ] + }, + "mirror_blocks.16.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 931.0621948242188, + "stable_rank": 29.082687377929688, + "spectral_norm": 13.489387512207031, + "frobenius_norm": 72.74606323242188, + "mp_bound": 5.802524060792869, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.32183683620300413, + "alpha_r2": 0.9072804457079097, + "condition_number": 25.116384506225586, + "top_10_sv": [ + 13.489387512207031, + 7.060013294219971, + 6.279201507568359, + 5.835643768310547, + 5.657328128814697, + 5.571991443634033, + 5.419640064239502, + 5.288918495178223, + 5.209487438201904, + 5.096517086029053 + ] + }, + "mirror_blocks.16.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 930.1187744140625, + "stable_rank": 96.4075698852539, + "spectral_norm": 7.073318958282471, + "frobenius_norm": 69.4510498046875, + "mp_bound": 5.513409108394016, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3293891515983345, + "alpha_r2": 0.9062945762866597, + "condition_number": 11.414506912231445, + "top_10_sv": [ + 7.073318958282471, + 5.871675491333008, + 5.750760555267334, + 5.609018325805664, + 5.564408302307129, + 5.420380115509033, + 5.2543768882751465, + 5.1836018562316895, + 5.113907814025879, + 5.022817611694336 + ] + }, + "mirror_blocks.16.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 923.9924926757812, + "stable_rank": 21.757003784179688, + "spectral_norm": 15.209756851196289, + "frobenius_norm": 70.94499969482422, + "mp_bound": 5.530330827401976, + "n_above_mp": 7, + "n_total": 1024, + "signal_ratio": 0.0068359375, + "alpha": 0.3340264472819412, + "alpha_r2": 0.9171434048106775, + "condition_number": 29.452625274658203, + "top_10_sv": [ + 15.209756851196289, + 7.534195899963379, + 6.4528326988220215, + 6.258402347564697, + 5.919894695281982, + 5.75828742980957, + 5.555954933166504, + 5.496253967285156, + 5.260440349578857, + 5.126454830169678 + ] + }, + "mirror_blocks.16.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 931.5618896484375, + "stable_rank": 39.143348693847656, + "spectral_norm": 10.560962677001953, + "frobenius_norm": 66.07428741455078, + "mp_bound": 5.280941009753105, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3218265480975189, + "alpha_r2": 0.9078077258431958, + "condition_number": 21.682981491088867, + "top_10_sv": [ + 10.560962677001953, + 6.787275314331055, + 5.599087238311768, + 5.4718217849731445, + 5.439906120300293, + 5.116901874542236, + 5.043114185333252, + 4.947134971618652, + 4.79224157333374, + 4.730659484863281 + ] + }, + "mirror_blocks.17.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 690.6998901367188, + "stable_rank": 11.876253128051758, + "spectral_norm": 14.420310974121094, + "frobenius_norm": 49.6951904296875, + "mp_bound": 1.4158523082733154, + "n_above_mp": 282, + "n_total": 1024, + "signal_ratio": 0.275390625, + "alpha": 0.6526672240437468, + "alpha_r2": 0.8490326028078233, + "condition_number": 62346.59765625, + "top_10_sv": [ + 14.420310974121094, + 7.878678798675537, + 7.345128536224365, + 6.418420791625977, + 6.004045486450195, + 5.770692825317383, + 5.57452917098999, + 5.31997537612915, + 5.130623817443848, + 5.004641532897949 + ] + }, + "mirror_blocks.17.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 218.60794067382812, + "stable_rank": 9.616716384887695, + "spectral_norm": 9.3808012008667, + "frobenius_norm": 29.09064292907715, + "mp_bound": 4.008194446563721, + "n_above_mp": 4, + "n_total": 256, + "signal_ratio": 0.015625, + "alpha": 0.4195642849775986, + "alpha_r2": 0.9099392727615295, + "condition_number": 56.53859329223633, + "top_10_sv": [ + 9.3808012008667, + 5.217532157897949, + 4.502448558807373, + 4.072835445404053, + 3.905411958694458, + 3.616271734237671, + 3.521489381790161, + 3.468380928039551, + 3.3824944496154785, + 3.309173345565796 + ] + }, + "mirror_blocks.17.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 241.30357360839844, + "stable_rank": 81.93017578125, + "spectral_norm": 1.9407001733779907, + "frobenius_norm": 17.566303253173828, + "mp_bound": 3.3378825187683105, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.24377608758072422, + "alpha_r2": 0.8112452037389976, + "condition_number": 4.8845906257629395, + "top_10_sv": [ + 1.9407001733779907, + 1.8930538892745972, + 1.8775633573532104, + 1.848752498626709, + 1.7958686351776123, + 1.7760910987854004, + 1.7691055536270142, + 1.737191915512085, + 1.715916395187378, + 1.696707010269165 + ] + }, + "mirror_blocks.17.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 718.1282958984375, + "stable_rank": 83.16262817382812, + "spectral_norm": 4.038750648498535, + "frobenius_norm": 36.8307991027832, + "mp_bound": 1.1324189901351929, + "n_above_mp": 297, + "n_total": 1024, + "signal_ratio": 0.2900390625, + "alpha": 0.626198486090398, + "alpha_r2": 0.795394780637157, + "condition_number": 36825.36328125, + "top_10_sv": [ + 4.038750648498535, + 3.779522180557251, + 3.764615774154663, + 3.5243029594421387, + 3.4937407970428467, + 3.3946914672851562, + 3.361696720123291, + 3.245537757873535, + 3.1367571353912354, + 3.0726311206817627 + ] + }, + "mirror_blocks.17.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 928.0424194335938, + "stable_rank": 30.34347152709961, + "spectral_norm": 13.212639808654785, + "frobenius_norm": 72.78170776367188, + "mp_bound": 5.746468871989315, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.32823913092263585, + "alpha_r2": 0.908446883272647, + "condition_number": 22.697620391845703, + "top_10_sv": [ + 13.212639808654785, + 7.487614154815674, + 6.35713529586792, + 5.9160261154174805, + 5.71431303024292, + 5.616559028625488, + 5.457281112670898, + 5.407783031463623, + 5.2713541984558105, + 5.1632890701293945 + ] + }, + "mirror_blocks.17.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 921.98681640625, + "stable_rank": 84.93029022216797, + "spectral_norm": 7.549761772155762, + "frobenius_norm": 69.57681274414062, + "mp_bound": 5.351311147140583, + "n_above_mp": 8, + "n_total": 1024, + "signal_ratio": 0.0078125, + "alpha": 0.34491153612537206, + "alpha_r2": 0.9101361754422992, + "condition_number": 12.341729164123535, + "top_10_sv": [ + 7.549761772155762, + 6.695281982421875, + 6.36198616027832, + 5.975881099700928, + 5.8675737380981445, + 5.6538519859313965, + 5.384984016418457, + 5.354708671569824, + 5.242159843444824, + 5.091554164886475 + ] + }, + "mirror_blocks.17.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 921.874755859375, + "stable_rank": 24.58268165588379, + "spectral_norm": 14.295276641845703, + "frobenius_norm": 70.87730407714844, + "mp_bound": 5.48388006285799, + "n_above_mp": 8, + "n_total": 1024, + "signal_ratio": 0.0078125, + "alpha": 0.33947400836784264, + "alpha_r2": 0.9211746187514315, + "condition_number": 24.997705459594727, + "top_10_sv": [ + 14.295276641845703, + 7.819306373596191, + 6.844958305358887, + 6.535338401794434, + 6.248237609863281, + 5.810671806335449, + 5.626343727111816, + 5.532675266265869, + 5.4771623611450195, + 5.3754119873046875 + ] + }, + "mirror_blocks.17.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 928.0716552734375, + "stable_rank": 34.896087646484375, + "spectral_norm": 11.267123222351074, + "frobenius_norm": 66.55817413330078, + "mp_bound": 5.256676667873708, + "n_above_mp": 4, + "n_total": 1024, + "signal_ratio": 0.00390625, + "alpha": 0.3293428896351482, + "alpha_r2": 0.9111570251926693, + "condition_number": 20.217910766601562, + "top_10_sv": [ + 11.267123222351074, + 6.92650842666626, + 6.014195442199707, + 5.771824359893799, + 5.226806163787842, + 5.175645351409912, + 5.1114678382873535, + 4.991644859313965, + 4.871926784515381, + 4.799374580383301 + ] + }, + "mirror_blocks.18.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 606.5923461914062, + "stable_rank": 19.05453109741211, + "spectral_norm": 10.551756858825684, + "frobenius_norm": 46.059993743896484, + "mp_bound": 0.9310512542724609, + "n_above_mp": 330, + "n_total": 1024, + "signal_ratio": 0.322265625, + "alpha": 0.7880345450878168, + "alpha_r2": 0.8768775729317234, + "condition_number": 10592.5625, + "top_10_sv": [ + 10.551756858825684, + 6.576756477355957, + 6.291285514831543, + 5.969563007354736, + 5.822000503540039, + 5.660767078399658, + 5.621981620788574, + 5.561458110809326, + 5.465102672576904, + 5.433443069458008 + ] + }, + "mirror_blocks.18.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 203.3289337158203, + "stable_rank": 15.152244567871094, + "spectral_norm": 6.674946308135986, + "frobenius_norm": 25.982818603515625, + "mp_bound": 2.784608244895935, + "n_above_mp": 26, + "n_total": 256, + "signal_ratio": 0.1015625, + "alpha": 0.5309094192698133, + "alpha_r2": 0.8778161978093411, + "condition_number": 31.344619750976562, + "top_10_sv": [ + 6.674946308135986, + 3.8062291145324707, + 3.7938194274902344, + 3.705780267715454, + 3.6417253017425537, + 3.6094729900360107, + 3.5757081508636475, + 3.5386276245117188, + 3.529515504837036, + 3.497487783432007 + ] + }, + "mirror_blocks.18.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 243.7182159423828, + "stable_rank": 83.83684539794922, + "spectral_norm": 1.928486704826355, + "frobenius_norm": 17.657699584960938, + "mp_bound": 3.590529978275299, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.2149352862815956, + "alpha_r2": 0.8235106360819169, + "condition_number": 5.123910903930664, + "top_10_sv": [ + 1.928486704826355, + 1.8424326181411743, + 1.8203599452972412, + 1.793023943901062, + 1.7599207162857056, + 1.7565810680389404, + 1.707355260848999, + 1.6774177551269531, + 1.6472864151000977, + 1.6253337860107422 + ] + }, + "mirror_blocks.18.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 706.3380126953125, + "stable_rank": 80.5675277709961, + "spectral_norm": 4.286775588989258, + "frobenius_norm": 38.477848052978516, + "mp_bound": 1.0701144933700562, + "n_above_mp": 343, + "n_total": 1024, + "signal_ratio": 0.3349609375, + "alpha": 0.6473314383774088, + "alpha_r2": 0.765011071462633, + "condition_number": 50112.02734375, + "top_10_sv": [ + 4.286775588989258, + 4.024496078491211, + 3.791698694229126, + 3.680622100830078, + 3.5267579555511475, + 3.4895246028900146, + 3.3953697681427, + 3.3051695823669434, + 3.209043264389038, + 3.18941068649292 + ] + }, + "mirror_blocks.18.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 924.9935913085938, + "stable_rank": 26.75147819519043, + "spectral_norm": 14.097662925720215, + "frobenius_norm": 72.91569519042969, + "mp_bound": 5.700313433360061, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.3346500460992262, + "alpha_r2": 0.9132548601494821, + "condition_number": 25.544021606445312, + "top_10_sv": [ + 14.097662925720215, + 7.556273937225342, + 6.313138961791992, + 6.082395076751709, + 5.850302696228027, + 5.577125549316406, + 5.450815200805664, + 5.367274761199951, + 5.198825836181641, + 5.182854652404785 + ] + }, + "mirror_blocks.18.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 920.052734375, + "stable_rank": 92.22322082519531, + "spectral_norm": 7.18129825592041, + "frobenius_norm": 68.96410369873047, + "mp_bound": 5.270497211495745, + "n_above_mp": 9, + "n_total": 1024, + "signal_ratio": 0.0087890625, + "alpha": 0.3511858425119, + "alpha_r2": 0.9175317206554943, + "condition_number": 11.947319030761719, + "top_10_sv": [ + 7.18129825592041, + 6.51509428024292, + 6.443070888519287, + 6.052488803863525, + 5.782515525817871, + 5.622491836547852, + 5.53987455368042, + 5.302340030670166, + 5.270542144775391, + 5.255793571472168 + ] + }, + "mirror_blocks.18.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 916.951416015625, + "stable_rank": 18.454391479492188, + "spectral_norm": 16.47149085998535, + "frobenius_norm": 70.7591781616211, + "mp_bound": 5.3747361150221895, + "n_above_mp": 8, + "n_total": 1024, + "signal_ratio": 0.0078125, + "alpha": 0.3473020010285147, + "alpha_r2": 0.9235646054943021, + "condition_number": 29.879819869995117, + "top_10_sv": [ + 16.47149085998535, + 7.702846527099609, + 6.878231048583984, + 6.59975528717041, + 6.493790626525879, + 5.814973831176758, + 5.668886184692383, + 5.455373287200928, + 5.364744186401367, + 5.236640453338623 + ] + }, + "mirror_blocks.18.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 925.2180786132812, + "stable_rank": 34.115299224853516, + "spectral_norm": 11.136119842529297, + "frobenius_norm": 65.044189453125, + "mp_bound": 5.062074113188641, + "n_above_mp": 7, + "n_total": 1024, + "signal_ratio": 0.0068359375, + "alpha": 0.3352427782620716, + "alpha_r2": 0.9114684896009867, + "condition_number": 21.17527961730957, + "top_10_sv": [ + 11.136119842529297, + 6.701422691345215, + 5.856301307678223, + 5.658908367156982, + 5.420210838317871, + 5.279207706451416, + 5.134861946105957, + 4.853182315826416, + 4.841711044311523, + 4.718622207641602 + ] + }, + "mirror_blocks.19.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 619.5433959960938, + "stable_rank": 18.711942672729492, + "spectral_norm": 11.09443187713623, + "frobenius_norm": 47.991519927978516, + "mp_bound": 1.0334219932556152, + "n_above_mp": 319, + "n_total": 1024, + "signal_ratio": 0.3115234375, + "alpha": 0.772451434072649, + "alpha_r2": 0.8727970913258525, + "condition_number": 52422.0703125, + "top_10_sv": [ + 11.09443187713623, + 6.478702068328857, + 6.421808242797852, + 6.210264205932617, + 5.898099422454834, + 5.750534534454346, + 5.465526580810547, + 5.299853801727295, + 5.2664313316345215, + 5.232556343078613 + ] + }, + "mirror_blocks.19.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 206.22445678710938, + "stable_rank": 12.261953353881836, + "spectral_norm": 7.376572608947754, + "frobenius_norm": 25.830596923828125, + "mp_bound": 2.761458098888397, + "n_above_mp": 20, + "n_total": 256, + "signal_ratio": 0.078125, + "alpha": 0.5146176696557786, + "alpha_r2": 0.8512289842753913, + "condition_number": 47.94727325439453, + "top_10_sv": [ + 7.376572608947754, + 4.112476348876953, + 3.874966859817505, + 3.6850075721740723, + 3.5020036697387695, + 3.424556016921997, + 3.2775120735168457, + 3.1888692378997803, + 3.149406671524048, + 3.081117868423462 + ] + }, + "mirror_blocks.19.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 244.21168518066406, + "stable_rank": 82.91676330566406, + "spectral_norm": 2.1144943237304688, + "frobenius_norm": 19.254297256469727, + "mp_bound": 4.042791455984116, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.20486536075539669, + "alpha_r2": 0.8509586230412005, + "condition_number": 5.235880374908447, + "top_10_sv": [ + 2.1144943237304688, + 2.068418025970459, + 2.0199368000030518, + 1.9896377325057983, + 1.9243032932281494, + 1.9146326780319214, + 1.85474693775177, + 1.8414450883865356, + 1.7955013513565063, + 1.7865314483642578 + ] + }, + "mirror_blocks.19.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 718.0026245117188, + "stable_rank": 100.08474731445312, + "spectral_norm": 3.997250556945801, + "frobenius_norm": 39.98944091796875, + "mp_bound": 1.1850062608718872, + "n_above_mp": 320, + "n_total": 1024, + "signal_ratio": 0.3125, + "alpha": 0.6247058648271221, + "alpha_r2": 0.7627363023897484, + "condition_number": 40112.7109375, + "top_10_sv": [ + 3.997250556945801, + 3.811657428741455, + 3.6265127658843994, + 3.55300235748291, + 3.443790912628174, + 3.3147835731506348, + 3.2664034366607666, + 3.260040044784546, + 3.183936834335327, + 3.1773746013641357 + ] + }, + "mirror_blocks.19.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 921.1821899414062, + "stable_rank": 20.824541091918945, + "spectral_norm": 16.02046775817871, + "frobenius_norm": 73.107666015625, + "mp_bound": 5.624550938575795, + "n_above_mp": 6, + "n_total": 1024, + "signal_ratio": 0.005859375, + "alpha": 0.33997569948848017, + "alpha_r2": 0.9134465061906961, + "condition_number": 30.14811134338379, + "top_10_sv": [ + 16.02046775817871, + 8.004988670349121, + 6.815584659576416, + 6.053225994110107, + 5.864286422729492, + 5.682826042175293, + 5.5322585105896, + 5.393547534942627, + 5.298839092254639, + 5.164860725402832 + ] + }, + "mirror_blocks.19.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 913.4828491210938, + "stable_rank": 64.77495574951172, + "spectral_norm": 8.485776901245117, + "frobenius_norm": 68.29598236083984, + "mp_bound": 5.1237318828102785, + "n_above_mp": 12, + "n_total": 1024, + "signal_ratio": 0.01171875, + "alpha": 0.3644257787906157, + "alpha_r2": 0.9297773923258119, + "condition_number": 12.943288803100586, + "top_10_sv": [ + 8.485776901245117, + 7.496995449066162, + 6.769553184509277, + 6.371829986572266, + 6.0476460456848145, + 5.819668292999268, + 5.710813522338867, + 5.599967956542969, + 5.419919013977051, + 5.3709917068481445 + ] + }, + "mirror_blocks.19.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 915.2433471679688, + "stable_rank": 15.742162704467773, + "spectral_norm": 17.8680419921875, + "frobenius_norm": 70.89395141601562, + "mp_bound": 5.338743332484469, + "n_above_mp": 8, + "n_total": 1024, + "signal_ratio": 0.0078125, + "alpha": 0.34795595563786613, + "alpha_r2": 0.9170025557031649, + "condition_number": 34.49018096923828, + "top_10_sv": [ + 17.8680419921875, + 7.728841781616211, + 6.664061546325684, + 6.12612247467041, + 5.89112663269043, + 5.700421333312988, + 5.561622142791748, + 5.460655212402344, + 5.291888236999512, + 5.162097454071045 + ] + }, + "mirror_blocks.19.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 923.1112060546875, + "stable_rank": 30.013948440551758, + "spectral_norm": 11.786266326904297, + "frobenius_norm": 64.571044921875, + "mp_bound": 4.992985578827361, + "n_above_mp": 6, + "n_total": 1024, + "signal_ratio": 0.005859375, + "alpha": 0.3378571936066118, + "alpha_r2": 0.908336919855349, + "condition_number": 24.350997924804688, + "top_10_sv": [ + 11.786266326904297, + 6.828085899353027, + 6.0875983238220215, + 5.372934341430664, + 5.354611873626709, + 5.081277847290039, + 4.93489408493042, + 4.776577472686768, + 4.6387858390808105, + 4.595118999481201 + ] + }, + "middle_blocks.0.attn.q_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 674.2701416015625, + "stable_rank": 15.248006820678711, + "spectral_norm": 12.026047706604004, + "frobenius_norm": 46.960147857666016, + "mp_bound": 1.2629574537277222, + "n_above_mp": 289, + "n_total": 1024, + "signal_ratio": 0.2822265625, + "alpha": 0.6850082213813383, + "alpha_r2": 0.8577373493014953, + "condition_number": 70671.640625, + "top_10_sv": [ + 12.026047706604004, + 7.158751487731934, + 7.101746559143066, + 6.498890399932861, + 6.113925457000732, + 5.645220756530762, + 5.418222427368164, + 5.067713737487793, + 4.9687957763671875, + 4.752420902252197 + ] + }, + "middle_blocks.0.attn.k_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 217.64886474609375, + "stable_rank": 11.739373207092285, + "spectral_norm": 7.770226001739502, + "frobenius_norm": 26.62294578552246, + "mp_bound": 3.4716603755950928, + "n_above_mp": 4, + "n_total": 256, + "signal_ratio": 0.015625, + "alpha": 0.431463341712347, + "alpha_r2": 0.8802875183536547, + "condition_number": 39.49501037597656, + "top_10_sv": [ + 7.770226001739502, + 4.5209856033325195, + 4.161535263061523, + 3.911479949951172, + 3.4423348903656006, + 3.3026723861694336, + 3.273172378540039, + 3.1558806896209717, + 3.128063917160034, + 3.063951253890991 + ] + }, + "middle_blocks.0.attn.v_proj.weight": { + "shape": [ + 256, + 1024 + ], + "effective_rank": 236.73318481445312, + "stable_rank": 70.63001251220703, + "spectral_norm": 2.373929023742676, + "frobenius_norm": 19.950895309448242, + "mp_bound": 3.510209083557129, + "n_above_mp": 0, + "n_total": 256, + "signal_ratio": 0.0, + "alpha": 0.29355375886272134, + "alpha_r2": 0.831571564938997, + "condition_number": 5.453413009643555, + "top_10_sv": [ + 2.373929023742676, + 2.3386001586914062, + 2.295886754989624, + 2.2434825897216797, + 2.2273242473602295, + 2.1970298290252686, + 2.1735589504241943, + 2.1181797981262207, + 2.0911500453948975, + 2.076988458633423 + ] + }, + "middle_blocks.0.attn.o_proj.weight": { + "shape": [ + 1024, + 1024 + ], + "effective_rank": 723.1683349609375, + "stable_rank": 82.79203796386719, + "spectral_norm": 4.435833930969238, + "frobenius_norm": 40.36170959472656, + "mp_bound": 1.2762367725372314, + "n_above_mp": 289, + "n_total": 1024, + "signal_ratio": 0.2822265625, + "alpha": 0.6163942490187059, + "alpha_r2": 0.7988000695875851, + "condition_number": 3184.6953125, + "top_10_sv": [ + 4.435833930969238, + 4.251309871673584, + 4.166616439819336, + 4.142618656158447, + 4.104921340942383, + 3.9006683826446533, + 3.8058104515075684, + 3.7110462188720703, + 3.671105146408081, + 3.6020100116729736 + ] + }, + "middle_blocks.0.ffn.w1.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 921.6627807617188, + "stable_rank": 19.731727600097656, + "spectral_norm": 16.13671875, + "frobenius_norm": 71.67996978759766, + "mp_bound": 5.4992035773016585, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.33764719368220536, + "alpha_r2": 0.9056899563762658, + "condition_number": 29.595256805419922, + "top_10_sv": [ + 16.13671875, + 7.6955132484436035, + 6.486385822296143, + 6.016712188720703, + 5.61277437210083, + 5.392815113067627, + 5.391153335571289, + 5.196187496185303, + 5.140034198760986, + 4.985111713409424 + ] + }, + "middle_blocks.0.ffn.w2.weight": { + "shape": [ + 1024, + 2752 + ], + "effective_rank": 922.4375610351562, + "stable_rank": 117.19658660888672, + "spectral_norm": 6.276037693023682, + "frobenius_norm": 67.94273376464844, + "mp_bound": 5.207448927918806, + "n_above_mp": 5, + "n_total": 1024, + "signal_ratio": 0.0048828125, + "alpha": 0.345004313250323, + "alpha_r2": 0.8961179675869516, + "condition_number": 9.517289161682129, + "top_10_sv": [ + 6.276037693023682, + 5.759649753570557, + 5.633343696594238, + 5.4667134284973145, + 5.386240482330322, + 5.176938533782959, + 5.111686706542969, + 5.094701290130615, + 4.967704772949219, + 4.866546154022217 + ] + }, + "middle_blocks.0.ffn.w3.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 913.4349365234375, + "stable_rank": 16.845548629760742, + "spectral_norm": 17.144311904907227, + "frobenius_norm": 70.36595916748047, + "mp_bound": 5.26667823710266, + "n_above_mp": 10, + "n_total": 1024, + "signal_ratio": 0.009765625, + "alpha": 0.35318562469218523, + "alpha_r2": 0.9218226757880902, + "condition_number": 32.42739486694336, + "top_10_sv": [ + 17.144311904907227, + 8.15190315246582, + 7.047175884246826, + 6.360540866851807, + 6.141634464263916, + 5.883530616760254, + 5.631952285766602, + 5.504978656768799, + 5.378512382507324, + 5.330729961395264 + ] + }, + "middle_blocks.0.ffn.w4.weight": { + "shape": [ + 2752, + 1024 + ], + "effective_rank": 920.865966796875, + "stable_rank": 18.12940216064453, + "spectral_norm": 15.7931489944458, + "frobenius_norm": 67.24507141113281, + "mp_bound": 5.143556204719468, + "n_above_mp": 6, + "n_total": 1024, + "signal_ratio": 0.005859375, + "alpha": 0.33862395588455296, + "alpha_r2": 0.9072930037359881, + "condition_number": 31.138105392456055, + "top_10_sv": [ + 15.7931489944458, + 6.966232776641846, + 6.278370380401611, + 5.6086039543151855, + 5.368446350097656, + 5.194730758666992, + 4.999834060668945, + 4.967520236968994, + 4.747239589691162, + 4.70646858215332 + ] + }, + "skip_head.weight": { + "shape": [ + 32000, + 1024 + ], + "effective_rank": 960.8615112304688, + "stable_rank": 3.654783248901367, + "spectral_norm": 109.66401672363281, + "frobenius_norm": 209.65003967285156, + "mp_bound": 35.83777754539527, + "n_above_mp": 1, + "n_total": 1024, + "signal_ratio": 0.0009765625, + "alpha": 0.19556546817172638, + "alpha_r2": 0.8802833350909326, + "condition_number": 49.63386917114258, + "top_10_sv": [ + 109.66401672363281, + 16.248952865600586, + 12.939766883850098, + 12.277144432067871, + 10.9157133102417, + 10.874079704284668, + 10.320158004760742, + 10.11323070526123, + 9.854007720947266, + 9.636229515075684 + ] + } +} \ No newline at end of file