danchern commited on
Commit
a73d9e2
·
1 Parent(s): 912ce08

Update sudoku baseline

Browse files
sudoku/sudoku_baseline/best_model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d4335b63d57a894e0951c3ef97d6ffaad0f7ceb69a228c8b223dadec537d0a7
3
- size 37608117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44b38aa217b85029b19d41428f3dbd7003eb7aa8ab40131bc3711ec5954ddab2
3
+ size 58606133
sudoku/sudoku_baseline/sudoku_baseline.yaml CHANGED
@@ -1,12 +1,12 @@
1
  training:
2
- experiment_name: "sudoku-baseline"
3
  # Hardware parameters
4
  num_workers: 4 # Number of workers for dataloader
5
  device: "cuda:0" # Device to use for training
6
  # Training parameters
7
  batch_size: 256
8
- learning_rate: 0.0056
9
- epochs: 50
10
  # scheduler:
11
  # name: "linear"
12
  # args:
@@ -20,15 +20,21 @@ training:
20
  checkpoint_metric:
21
  name: "accuracy_unknown_cells"
22
  mode: "max"
 
23
  # Seed for reproducibility
24
  seed: 42
 
 
25
 
26
  dataset:
27
  name: "sudoku"
28
  path: "/var/scratch/dchernia/sudoku"
 
 
29
 
30
  model:
31
  name: "SudokuCNN"
32
- losses: [{"name": "cross_entropy", "weight": 1.0}]
33
  args:
34
- dropout: 0.5589
 
 
1
  training:
2
+ experiment_name: "sudoku-deep-baseline-midlayers=3-dropout=0.5-batchsize=256-lr=0.001-short-decoder-relu"
3
  # Hardware parameters
4
  num_workers: 4 # Number of workers for dataloader
5
  device: "cuda:0" # Device to use for training
6
  # Training parameters
7
  batch_size: 256
8
+ learning_rate: 0.001
9
+ epochs: 20
10
  # scheduler:
11
  # name: "linear"
12
  # args:
 
20
  checkpoint_metric:
21
  name: "accuracy_unknown_cells"
22
  mode: "max"
23
+
24
  # Seed for reproducibility
25
  seed: 42
26
+ logger: "sudoku_logger"
27
+ max_samples_to_log: 16
28
 
29
  dataset:
30
  name: "sudoku"
31
  path: "/var/scratch/dchernia/sudoku"
32
+ args:
33
+ filter_train: false
34
 
35
  model:
36
  name: "SudokuCNN"
37
+ losses: [{"name": "cross_entropy_loss", "weight": 1.0}]
38
  args:
39
+ dropout: 0.5
40
+ n_mid_layers: 3