File size: 1,365 Bytes
65fbc13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
  "paper": {
    "title": "Less is More: Parameter-Free Text Classification with Gzip",
    "arxiv_id": "2212.09410",
    "authors": "Zhiying Jiang, Matthew Y. R. Yang, Mikhail Tsirlin, Raphael Tang, Jimmy Lin",
    "year": 2022
  },
  "method": {
    "name": "gzip + NCD + kNN",
    "num_parameters": 0,
    "requires_training": false,
    "requires_gpu": false,
    "description": "Normalized Compression Distance using gzip as compressor with k-nearest-neighbor classification. NCD(x,y) = (C(xy) - min(C(x),C(y))) / max(C(x),C(y))"
  },
  "dataset": "fancyzhx/ag_news",
  "config": {
    "train_samples_per_class": 500,
    "total_train_samples": 2000,
    "test_samples": 200,
    "k_values_tested": [1, 2, 3, 5, 7],
    "best_k": 7,
    "compressor": "gzip",
    "random_seed": 42
  },
  "best_result": {
    "k": 7,
    "accuracy": 0.775,
    "macro_f1": 0.773
  },
  "sweep_results": {
    "k1": {"accuracy": 0.725, "macro_f1": 0.720},
    "k2": {"accuracy": 0.725, "macro_f1": 0.720},
    "k3": {"accuracy": 0.735, "macro_f1": 0.733},
    "k5": {"accuracy": 0.760, "macro_f1": 0.755},
    "k7": {"accuracy": 0.775, "macro_f1": 0.773}
  },
  "paper_comparison": {
    "paper_full_train_accuracy": 0.937,
    "paper_bert_accuracy": 0.944,
    "our_accuracy": 0.775,
    "our_train_fraction": "2000/120000"
  },
  "hardware": "CPU only (cpu-basic, 2 vCPU)"
}