Upload folder using huggingface_hub

Files changed (8) hide show

README.md CHANGED Viewed

@@ -1,9 +1,7 @@
-# triangle-5k-og Checkpoints
 This repository contains the final trained model and intermediate checkpoints.
 - The main directory contains the fully trained model (checkpoint 0).
 - The `checkpoints` directory contains all intermediate checkpoints.
-Now updated to match tetrahedron format

+# triangle-100k-og Checkpoints
 This repository contains the final trained model and intermediate checkpoints.
 - The main directory contains the fully trained model (checkpoint 0).
 - The `checkpoints` directory contains all intermediate checkpoints.

checkpoints/checkpoint-100.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fafa99fb361dae2d224a1307eb27fd7362cfb29144c5b369bf9dae370563080
-size 2478085

 version https://git-lfs.github.com/spec/v1
+oid sha256:d171f342841db6a5fc0a1893c1d8ef5306cb446a356ff45b2ad0a61e53098278
+size 2295597

checkpoints/checkpoint-25.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:200f2670bbfd05c752c6af8e5e3835b47ff11e4c7286203f0bbfaa0c6fbead11
-size 2478049

 version https://git-lfs.github.com/spec/v1
+oid sha256:b30b7ea80a4a204da4284b08471c32085ae6665e37b24d9365f21cfdce1f5dd3
+size 2295561

checkpoints/checkpoint-50.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:beb0e997530cf6327992063dab60027b1b74c57fcb2cfaefdfdbabcd7d1068af
-size 2478049

 version https://git-lfs.github.com/spec/v1
+oid sha256:baa896287380f2fdc7e6dfeafbf74acfa1801fa5c2b9cd35e13a8b656b8804de
+size 2295561

checkpoints/checkpoint-75.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4a79008dd26635ed3276e069048391ff2962fa834d33e3649c9d2653fe5893a
-size 2478049

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2a97f4afb2cd35120cff209c44a5c9a9dfd73182a27ac332be49740a09557e7
+size 2295561

convert_checkpoints.py CHANGED Viewed

@@ -1,48 +1,37 @@
-# convert_and_replace.py
-import os
-from pathlib import Path
-import torch
-def convert_checkpoint_format(checkpoint_path):
-    """Convert nested format to direct format in place."""
-    # Load the checkpoint
-    checkpoint = torch.load(checkpoint_path, map_location='cpu')
-    model_state_dict = checkpoint
-    print(f"Converting {checkpoint_path}: nested -> direct format")
-    # Create new state dict with flattened keys
-    new_state_dict = {}
-    for key, value in model_state_dict.items():
-        if key.startswith('model.'):
-            # Remove 'model.' prefix
-            new_key = key[6:]  # Remove 'model.' (6 characters)
-            print(f"Converting key: '{key}' -> '{new_key}'")
-            new_state_dict[new_key] = value
         else:
-            # Keep keys that don't start with 'model.'
-            print(f"Keeping key: '{key}'")
-            new_state_dict[key] = value
-    # Save back in direct format
-    torch.save(new_state_dict, checkpoint_path)
-    print(f"Updated: {checkpoint_path}")
-def convert_all_checkpoints():
-    """Convert all checkpoint files in the current directory."""
-    checkpoints_dir = Path("checkpoints")
-    if not checkpoints_dir.exists():
-        print("No checkpoints directory found")
-        return
-    # Convert all .pt files
-    for checkpoint_file in checkpoints_dir.glob("*.pt"):
-        convert_checkpoint_format(checkpoint_file)
-if __name__ == "__main__":
-    convert_all_checkpoints()

+""" Open every file in the checkpoints directory and change the keys. keys are currently a nested dict. keep only keys that are in checkpoint['model'] and rename those keys such that, e.g. 'model.pos_embed.W_pos' becomes 'pos_embed.W_pos'.
+"""
+import os
+import json
+import torch
+checkpoints_dir = "checkpoints/"
+for file in os.listdir(checkpoints_dir):
+    if file.endswith(".pt"):
+        file_path = os.path.join(checkpoints_dir, file)
+        print(f"Processing {file}...")
+        # Load the checkpoint
+        checkpoint = torch.load(file_path, map_location='cpu')
+        # Extract model keys and rename them
+        if 'model' in checkpoint:
+            model_state_dict = checkpoint['model']
+            converted_state_dict = {}
+            for key, value in model_state_dict.items():
+                # Remove 'model.' prefix if it exists
+                if key.startswith('model.'):
+                    new_key = key[6:]  # Remove 'model.' prefix
+                else:
+                    new_key = key
+                converted_state_dict[new_key] = value
+            # Save the converted checkpoint as a flat dictionary
+            output_path = os.path.join(checkpoints_dir, f"converted_{file}")
+            torch.save(converted_state_dict, output_path)
+            print(f"Saved converted checkpoint to {output_path}")
         else:
+            print(f"Warning: No 'model' key found in {file}")

model.yaml CHANGED Viewed

@@ -3,9 +3,9 @@ implementation: transformer_lens
 model_name: default
 n_layers: '2'
 model_seed: '1'
-d_model: '16'
 n_ctx: '1024'
-d_head: '4'
 n_heads: '8'
 act_fn: gelu
 d_vocab: '5000'

 model_name: default
 n_layers: '2'
 model_seed: '1'
+d_model: '8'
 n_ctx: '1024'
+d_head: '2'
 n_heads: '8'
 act_fn: gelu
 d_vocab: '5000'

training.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
-output_dir: checkpoints/triangle-5k-og
 overwrite_output_dir: 'False'
 do_train: 'False'
 do_eval: 'False'
@@ -28,7 +28,7 @@ warmup_steps: '0'
 log_level: warning
 log_level_replica: warning
 log_on_each_node: 'True'
-logging_dir: checkpoints/triangle-5k-og/runs/Jul03_18-23-05_842bf34089c7
 logging_strategy: IntervalStrategy.STEPS
 logging_first_step: 'True'
 logging_steps: '250'
@@ -64,7 +64,7 @@ eval_steps: None
 dataloader_num_workers: '0'
 dataloader_prefetch_factor: None
 past_index: '-1'
-run_name: triangle-5k-og
 disable_tqdm: 'False'
 remove_unused_columns: 'False'
 label_names: '[''input_ids'']'
@@ -97,7 +97,7 @@ skip_memory_metrics: 'True'
 use_legacy_prediction_loop: 'False'
 push_to_hub: 'False'
 resume_from_checkpoint: None
-hub_model_id: timaeus/triangle-5k-og
 hub_strategy: HubStrategy.EVERY_SAVE
 hub_token: None
 hub_private_repo: 'False'

+output_dir: checkpoints/triangle-100k-og
 overwrite_output_dir: 'False'
 do_train: 'False'
 do_eval: 'False'
 log_level: warning
 log_level_replica: warning
 log_on_each_node: 'True'
+logging_dir: checkpoints/triangle-100k-og/runs/Jul09_16-32-16_7be3271c880a
 logging_strategy: IntervalStrategy.STEPS
 logging_first_step: 'True'
 logging_steps: '250'
 dataloader_num_workers: '0'
 dataloader_prefetch_factor: None
 past_index: '-1'
+run_name: triangle-100k-og
 disable_tqdm: 'False'
 remove_unused_columns: 'False'
 label_names: '[''input_ids'']'
 use_legacy_prediction_loop: 'False'
 push_to_hub: 'False'
 resume_from_checkpoint: None
+hub_model_id: timaeus/triangle-100k-og
 hub_strategy: HubStrategy.EVERY_SAVE
 hub_token: None
 hub_private_repo: 'False'