gzip-text-classifier / results.json
knoxel's picture
Upload results.json
65fbc13 verified
{
"paper": {
"title": "Less is More: Parameter-Free Text Classification with Gzip",
"arxiv_id": "2212.09410",
"authors": "Zhiying Jiang, Matthew Y. R. Yang, Mikhail Tsirlin, Raphael Tang, Jimmy Lin",
"year": 2022
},
"method": {
"name": "gzip + NCD + kNN",
"num_parameters": 0,
"requires_training": false,
"requires_gpu": false,
"description": "Normalized Compression Distance using gzip as compressor with k-nearest-neighbor classification. NCD(x,y) = (C(xy) - min(C(x),C(y))) / max(C(x),C(y))"
},
"dataset": "fancyzhx/ag_news",
"config": {
"train_samples_per_class": 500,
"total_train_samples": 2000,
"test_samples": 200,
"k_values_tested": [1, 2, 3, 5, 7],
"best_k": 7,
"compressor": "gzip",
"random_seed": 42
},
"best_result": {
"k": 7,
"accuracy": 0.775,
"macro_f1": 0.773
},
"sweep_results": {
"k1": {"accuracy": 0.725, "macro_f1": 0.720},
"k2": {"accuracy": 0.725, "macro_f1": 0.720},
"k3": {"accuracy": 0.735, "macro_f1": 0.733},
"k5": {"accuracy": 0.760, "macro_f1": 0.755},
"k7": {"accuracy": 0.775, "macro_f1": 0.773}
},
"paper_comparison": {
"paper_full_train_accuracy": 0.937,
"paper_bert_accuracy": 0.944,
"our_accuracy": 0.775,
"our_train_fraction": "2000/120000"
},
"hardware": "CPU only (cpu-basic, 2 vCPU)"
}