caiovicentino1 commited on
Commit
144fd82
·
verified ·
1 Parent(s): b2797fb

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  feature_taxonomy.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  feature_taxonomy.json filter=lfs diff=lfs merge=lfs -text
37
+ cosine_vs_causal.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -19,7 +19,7 @@ base-specific, and chat-specific features.
19
 
20
  ## Recipe
21
 
22
- - BatchTopK k = 100 (annealed from 100)
23
  - 100 M training tokens (FineWeb-Edu + LMSYS-chat-1M, 50/50)
24
  - Per-model normalization, BOS dropped
25
  - Adam lr 0.0001, decay last 20%, grad clip 1.0
@@ -28,17 +28,17 @@ base-specific, and chat-specific features.
28
 
29
  | | base (A) | chat (B) |
30
  |---|---|---|
31
- | variance explained | 0.8890 | 0.8818 |
32
 
33
- L0 = 100.2, dead-feature fraction = 43.05%
34
 
35
  ## Δ_norm taxonomy
36
 
37
  {
38
- "shared": 39613,
39
- "dead": 31742,
40
- "unclassified": 2365,
41
- "base_only": 5,
42
  "chat_only": 3
43
  }
44
 
 
19
 
20
  ## Recipe
21
 
22
+ - BatchTopK k = 100 (annealed from 1000)
23
  - 100 M training tokens (FineWeb-Edu + LMSYS-chat-1M, 50/50)
24
  - Per-model normalization, BOS dropped
25
  - Adam lr 0.0001, decay last 20%, grad clip 1.0
 
28
 
29
  | | base (A) | chat (B) |
30
  |---|---|---|
31
+ | variance explained | 0.8773 | 0.8666 |
32
 
33
+ L0 = 100.5, dead-feature fraction = 42.89%
34
 
35
  ## Δ_norm taxonomy
36
 
37
  {
38
+ "shared": 39711,
39
+ "dead": 31625,
40
+ "unclassified": 2385,
41
+ "base_only": 4,
42
  "chat_only": 3
43
  }
44
 
causal_validation.csv ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature,class,delta_norm,cosine,mean_KL_A,mean_KL_B,pearson_CE,n_probes_fired
2
+ 42646,shared,0.4499010443687439,0.9687612056732178,0.00035026412221365416,0.00028150821754024946,0.5143612909851984,3
3
+ 361,shared,0.5569367408752441,0.9780248999595642,0.0006873465359344014,0.0007361738909302014,0.5108647146754345,7
4
+ 4558,shared,0.5509030818939209,0.9575502872467041,0.00018537026721787697,0.00012096479167666985,-0.13718220435043688,4
5
+ 1794,shared,0.469279944896698,0.960382878780365,0.001482554691998909,0.001030864844021077,0.9811627149528812,3
6
+ 45586,shared,0.4056813716888428,0.6997604370117188,0.0015843691459546487,0.0025503765791654587,-0.4679505155192875,3
7
+ 10695,shared,0.5084750652313232,0.9765876531600952,0.0010638559428116424,0.000314469160639419,0.03518075976261837,5
8
+ 32547,shared,0.4399070739746094,0.9335393905639648,0.0008043809502851218,0.0016580600495217368,0.7817385223732515,4
9
+ 41492,shared,0.4627435505390167,0.9167423248291016,0.0005305631217803845,0.0013244837913741019,0.9398034461106574,6
10
+ 7051,shared,0.5654429197311401,0.9452683329582214,0.0011601387290284038,0.0007399948604870588,0.4416892881307169,5
11
+ 4818,shared,0.5270645022392273,0.9649407863616943,0.00057997082330985,0.0013192057667765766,0.24090239768379787,4
12
+ 24356,shared,0.5819075107574463,0.9779106378555298,0.0008303821010713364,0.0010876997611148909,0.31291000447434886,8
13
+ 35144,shared,0.5918715000152588,0.9615367650985718,0.00029825038044301825,0.00026526696733851295,0.2574244913139678,7
14
+ 60833,shared,0.5777490735054016,0.9776451587677002,0.000839033414649748,0.0007356036280725675,0.2761933345848742,4
15
+ 62774,shared,0.5035171508789062,0.9636607766151428,0.003912068961653858,0.004923530679661781,0.9820889830756147,3
16
+ 10650,shared,0.435795396566391,0.9690171480178833,0.0027005543495022266,0.00662028947260751,0.9354661721965355,8
17
+ 46438,shared,0.5583057999610901,0.9219706058502197,0.0024537488352507353,0.0018319709536929925,0.9817560410487564,3
18
+ 52663,shared,0.5449858903884888,0.9808030128479004,0.0017938694259100885,0.001790035743527239,0.8338294382289249,14
19
+ 47444,shared,0.5580482482910156,0.9784024357795715,0.0022077696294218185,0.0019055540427264834,0.7744610585160919,6
20
+ 15501,shared,0.5721683502197266,0.9473258852958679,0.0018174610468311887,0.0008239755658602613,-0.13876719773826007,3
21
+ 57673,shared,0.40031182765960693,0.9069675803184509,0.0041101546201389285,0.0015197889471892268,0.9137820923512755,5
22
+ 54503,shared,0.5071892738342285,0.9759280681610107,0.0013729936714905004,0.0029875804126883545,0.9584009171248054,3
23
+ 57911,shared,0.5018664002418518,0.979893147945404,0.0010929527894144744,0.0008830939987092279,0.748928706901456,7
24
+ 60815,shared,0.540656328201294,0.9729748964309692,0.0013768659788183867,0.0010098683982505464,0.9242266862695779,5
25
+ 50548,shared,0.5117408037185669,0.952705442905426,0.0005984317213005852,0.0017475062340963632,-0.3442867743103403,4
26
+ 29715,shared,0.5661550760269165,0.9159844517707825,0.0006894378013081829,0.0005703238461289099,0.7289660524245968,6
27
+ 21907,shared,0.4945530295372009,0.8770098686218262,0.00034953729417945095,0.00033152137140746163,0.9911001842414667,6
28
+ 34100,shared,0.44108909368515015,0.7451911568641663,0.001480887954433759,0.0016502767878895004,0.6354396021600477,3
29
+ 37704,shared,0.5395039319992065,0.9606422185897827,0.0023398584839014803,0.0014363208120812486,0.32358981917214463,5
30
+ 70217,shared,0.44601958990097046,0.9761323928833008,0.0015000146877355292,0.0010239070957140939,0.9781950123053889,4
31
+ 35338,shared,0.5445601344108582,0.9374934434890747,0.0011118365032416477,0.0011259269685979234,0.9154981893534829,4
32
+ 44251,shared,0.5158880352973938,0.970435380935669,0.0007589005229723019,0.0004703010296604286,0.9934240722796591,3
33
+ 58234,shared,0.5013293623924255,0.9503910541534424,0.0011765657327487133,0.0012808772007701918,-0.04115479379450943,4
34
+ 54706,shared,0.5834556221961975,0.8696204423904419,0.0008888045917956333,0.0010285107191521092,0.7564393756429583,12
35
+ 19439,shared,0.5963613390922546,0.9107996225357056,0.00036880209836454014,0.0006358054720294604,0.10819996566376958,8
36
+ 26994,shared,0.5486676692962646,0.9701230525970459,0.0008238435645277301,0.0021700598687554398,-0.666515829700643,3
37
+ 45701,shared,0.48239102959632874,0.965920090675354,0.0007045287832928201,0.00048316595833360526,0.7809382128721067,3
38
+ 40313,shared,0.5459778308868408,0.9600326418876648,0.0005380256174248643,0.0013116647001879755,-0.2578095102409567,5
39
+ 29277,shared,0.5959312319755554,0.9790481328964233,0.0004552071569930506,0.0006563022440023555,0.5304482859764592,14
40
+ 27711,shared,0.5598475337028503,0.9803685545921326,0.00034111485607960884,0.0007631681265441633,-0.21334251300681437,6
41
+ 65798,shared,0.5138514637947083,0.96925288438797,0.0009758987901875571,0.0003184518416219362,0.2973991029788298,4
42
+ 69275,shared,0.46130990982055664,0.9776513576507568,0.009806151383600081,0.0057419685308559565,0.5685187847434534,8
43
+ 27797,shared,0.5403455495834351,0.8583166599273682,0.0014819076168350875,0.000558991383513785,0.886122786612071,4
44
+ 8119,shared,0.5278910994529724,0.9825807809829712,0.0005395049865910551,0.0006613159469755677,0.22155297482110337,6
45
+ 12073,shared,0.5218182802200317,0.9745292663574219,0.0004697950862464495,0.0008608509688201593,0.19558085073354659,4
46
+ 68407,shared,0.5051596164703369,0.9642737507820129,0.0006771386186301242,0.000902094262846731,0.3089815178520834,5
47
+ 10213,shared,0.41001462936401367,0.9690078496932983,0.0015707006289934118,0.003195101007198294,0.999713040834574,3
48
+ 23520,shared,0.5543535947799683,0.9397248029708862,0.0016626781798549928,0.0024044125480031653,0.5070814916878974,4
49
+ 5053,shared,0.4310462474822998,0.9774919152259827,0.003495614684652537,0.0019445959478616714,0.9632725732697864,3
50
+ 64492,shared,0.592833399772644,0.9777247905731201,0.0016993676461944623,0.0010970364902667435,-0.6197086922419984,7
51
+ 20432,shared,0.5277723670005798,0.8493714332580566,0.0005494452721904963,0.0006279241177253425,0.7239012431482981,5
52
+ 22683,shared,0.5997784733772278,0.9748894572257996,0.001332021008339426,0.0015195006896849212,-0.22733265867267116,9
53
+ 2471,shared,0.50933438539505,0.9620163440704346,0.002370804902360154,0.0025380672401903817,0.6637490113589803,6
54
+ 22406,shared,0.5865375399589539,0.9782766699790955,0.0009203328819629471,0.000296219168636266,-0.45590589511523366,7
55
+ 53063,shared,0.565894365310669,0.9669222831726074,0.0008607247109466698,0.0009179432017845102,0.76102565843491,4
56
+ 732,shared,0.5278809666633606,0.9227498769760132,0.0021377086911040046,0.005134541502532859,0.8443204997744512,3
57
+ 20231,shared,0.5010678172111511,0.9426530599594116,0.002779615082545206,0.0008208309203231087,0.36164195929311127,3
58
+ 73444,shared,0.5865200757980347,0.9612457752227783,0.0011093589350821276,0.0009374785327054269,-0.11088497607558248,8
59
+ 20252,shared,0.529986560344696,0.9783006310462952,0.0009738134249346331,0.0012603226103919951,0.6961008974455208,4
60
+ 3642,shared,0.5016448497772217,0.8703551888465881,0.00024044961201070693,0.00026541258848737924,0.7796744537066341,5
61
+ 18529,shared,0.46889954805374146,0.845805287361145,0.0006517739918005342,0.0021349554881453514,0.9982147555525157,3
62
+ 39717,shared,0.504048764705658,0.9751577973365784,0.0014567440084647387,0.001714786936645396,0.2512586588062993,4
63
+ 53688,shared,0.558761477470398,0.9746147394180298,0.0004001747845904902,0.00017292714455834356,0.8816117919339718,5
64
+ 1052,shared,0.4922683537006378,0.9564507007598877,0.0003323498312965967,0.00041748992043721955,-0.2050387770944763,4
65
+ 18787,shared,0.44051018357276917,0.9439552426338196,0.000685746674813951,0.001056715941861815,0.5809791012216582,3
66
+ 54578,shared,0.5772069692611694,0.9751758575439453,0.0012629338860278949,0.0009796920443477576,0.7005432316326226,5
67
+ 13453,shared,0.4737304449081421,0.9782998561859131,0.0020157160179223865,0.0014420233346754685,0.6996719533450978,4
68
+ 27948,shared,0.49797841906547546,0.9801362752914429,0.0010022739351774766,0.001564015155963716,0.9714796605253556,9
69
+ 24424,shared,0.5543129444122314,0.9721282124519348,0.0013369863154366612,0.00222221112344414,0.596107420276842,5
70
+ 48602,unclassified,0.8083633780479431,-0.3936921954154968,0.0002675905680260055,0.0005439086405911237,0.15852710499712638,9
71
+ 36378,unclassified,0.20600301027297974,0.7107419967651367,0.00032754164058133026,0.0006455192968587653,0.39071255055835435,5
72
+ 53072,unclassified,0.8175182342529297,-0.5116434097290039,1.0883230184314622e-05,0.00038666296217115814,0.9106957775287421,6
73
+ 37406,unclassified,0.27152693271636963,0.7936084270477295,0.00010444922827446135,0.0017056300534932234,-0.6173350458532736,4
74
+ 60508,unclassified,0.6508164405822754,0.8973047137260437,0.0004987486013684853,0.0006318441299602758,0.016096132106992192,12
75
+ 29259,unclassified,0.6522914171218872,0.9079272747039795,0.0008597410433139885,0.0021927710804447997,0.5959416355118956,8
76
+ 40561,unclassified,0.6278425455093384,0.828863799571991,0.00013779811191246458,0.0002560781152472676,0.8381164600492058,3
77
+ 23846,unclassified,0.6904646158218384,0.9449024796485901,0.0008279174910323187,0.0003906520240030255,0.6620630285031748,9
78
+ 56875,unclassified,0.6069067716598511,0.9712244272232056,0.00043307106534484774,0.0013303512052402766,0.7360965720152607,4
79
+ 46282,unclassified,0.399735689163208,0.8420406579971313,0.0008953847573138773,0.0008874677138616486,0.9959377002758074,3
80
+ 64657,unclassified,0.2649328410625458,0.891894519329071,0.0012456516678867047,0.0008773530811367891,0.14519062698186433,8
81
+ 49059,unclassified,0.6421933770179749,0.9607788324356079,0.0001856049796439644,0.0003641321973191225,0.9988883143908203,3
82
+ 45651,unclassified,0.6516387462615967,0.798211395740509,0.0009770735050551593,0.0010339522195863537,-0.898412364526631,4
83
+ 13789,unclassified,0.22061845660209656,0.8967344760894775,0.0008156634868328183,0.00967529519607524,0.006444357240435912,8
84
+ 37532,unclassified,0.25058701634407043,0.8076569437980652,0.001386195665395462,0.0003722914701938862,0.32500979413423475,5
85
+ 24153,unclassified,0.6440958976745605,0.9472424983978271,0.000694500413374044,0.00033858782080642413,-0.14928776881099243,4
86
+ 19770,unclassified,0.6456837058067322,0.8455252051353455,0.00015403707220684737,0.0008742502978596652,0.7975225658389049,3
87
+ 1515,unclassified,0.6396113634109497,0.9121227264404297,4.83542483493693e-05,0.00028796545161640096,-0.04032669468533977,5
88
+ 29320,unclassified,0.6356933116912842,0.9125940799713135,0.0007852217190052865,0.0005922001882611637,0.5716012670872999,7
89
+ 67986,unclassified,0.6206876635551453,0.9747179746627808,0.0010603314731270075,0.0019145806630452473,-0.893499600884729,3
90
+ 61547,unclassified,0.2781330943107605,0.8912808299064636,0.0004875328018840719,0.00045501587305807334,0.8029328706867953,8
91
+ 65331,unclassified,0.6705557703971863,0.8911718130111694,0.0006151710782432929,0.0006958493497222662,-0.2965017229922185,5
92
+ 32557,unclassified,0.38846325874328613,0.826408326625824,0.00012055723861218819,7.099160120560555e-05,0.4957390142620299,3
93
+ 2103,unclassified,0.6513481140136719,0.9094518423080444,0.00036076426622457804,0.0008638007100671529,0.617899608306032,5
94
+ 25038,unclassified,0.6746888160705566,0.949090838432312,0.0005552469698233,0.0008775395772748172,0.12557604977663236,37
95
+ 22220,unclassified,0.8102065324783325,-0.40847545862197876,0.00088148181384895,0.0019030703406315297,-0.5651920169912124,4
96
+ 47901,unclassified,0.3788280487060547,0.8694870471954346,0.0013744387251790613,0.0008970412745838985,-0.14288091340899126,4
97
+ 1427,unclassified,0.6069611310958862,0.9780724048614502,0.0005607170696748653,0.0010155246242788962,0.7892929314545765,10
cfg.json CHANGED
@@ -8,8 +8,8 @@
8
  "d_model": 2304,
9
  "expansion": 32,
10
  "k_batchtopk": 100,
11
- "k_warmup_init": 100,
12
- "k_warmup_steps": 1,
13
  "dec_init_norm": 1.0,
14
  "token_budget": 100000000,
15
  "seq_len": 512,
@@ -27,19 +27,19 @@
27
  "hf_repo_name": "gemma2-2b-crosscoder-model-diff-papergrade",
28
  "n_features": 73728,
29
  "hf_repo": "caiovicentino1/gemma2-2b-crosscoder-model-diff-papergrade",
30
- "norm_scale_A": 0.2676052357480563,
31
- "norm_scale_B": 0.23619559913264956,
32
  "val_metrics": {
33
- "ve_A": 0.8890310853719712,
34
- "ve_B": 0.8818201005458832,
35
- "L0": 100.21483154296875,
36
- "dead_frac": 0.4305284321308136
37
  },
38
  "taxonomy_counts": {
39
- "shared": 39613,
40
- "dead": 31742,
41
- "unclassified": 2365,
42
- "base_only": 5,
43
  "chat_only": 3
44
  }
45
  }
 
8
  "d_model": 2304,
9
  "expansion": 32,
10
  "k_batchtopk": 100,
11
+ "k_warmup_init": 1000,
12
+ "k_warmup_steps": 5000,
13
  "dec_init_norm": 1.0,
14
  "token_budget": 100000000,
15
  "seq_len": 512,
 
27
  "hf_repo_name": "gemma2-2b-crosscoder-model-diff-papergrade",
28
  "n_features": 73728,
29
  "hf_repo": "caiovicentino1/gemma2-2b-crosscoder-model-diff-papergrade",
30
+ "norm_scale_A": 0.2677689692703698,
31
+ "norm_scale_B": 0.23634909321590494,
32
  "val_metrics": {
33
+ "ve_A": 0.8773096382617951,
34
+ "ve_B": 0.8665769904851913,
35
+ "L0": 100.53505859375,
36
+ "dead_frac": 0.4289415180683136
37
  },
38
  "taxonomy_counts": {
39
+ "shared": 39711,
40
+ "dead": 31625,
41
+ "unclassified": 2385,
42
+ "base_only": 4,
43
  "chat_only": 3
44
  }
45
  }
cosine_vs_causal.pdf ADDED
Binary file (22.8 kB). View file
 
cosine_vs_causal.png ADDED

Git LFS Details

  • SHA256: ce275d552a9a13aa008fb38a81d0dde072c43b70f2eea386cce5a131b735d675
  • Pointer size: 131 Bytes
  • Size of remote file: 183 kB
crosscoder_final.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57f0d1fd7ac040a4074fdef289cfa641501a2df8913c4adaf978207335990050
3
  size 2718517664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6bc9b7821443de33d40f72da19004f2869d96b3a5c0101f6b13e594c92bf5a5
3
  size 2718517664
delta_norm_hist.png CHANGED
feature_taxonomy.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3d85905bb7003841fa548e838b36fffa763f2842280508200d6943e8fa5b4bc
3
- size 11095294
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4cac7c978b5350e714bd137d5b8339707ad9a0780d49e4f5facefde40eae55
3
+ size 11095705
train_log.json CHANGED
The diff for this file is too large to render. See raw diff