zemberek temizlik

Browse files

Files changed (7) hide show

.claude/settings.local.json +4 -1
hf_benchmark.py +324 -0
nedo_turkish_tokenizer/data/zemberek-full.jar +0 -3
paper_baseline_check.py +103 -0
results/tr_mmlu_per_sample.csv +0 -0
results/tr_mmlu_report.md +85 -0
results/tr_mmlu_summary.json +16 -0

.claude/settings.local.json CHANGED Viewed

@@ -9,7 +9,10 @@
       "Bash(huggingface-cli whoami:*)",
       "Bash(python3:*)",
       "Bash(git lfs:*)",
-      "Bash(sudo apt-get:*)"
     ]
   }
 }

       "Bash(huggingface-cli whoami:*)",
       "Bash(python3:*)",
       "Bash(git lfs:*)",
+      "Bash(sudo apt-get:*)",
+      "Bash(grep:*)",
+      "Bash(git mv:*)",
+      "Bash(git commit:*)"
     ]
   }
 }

hf_benchmark.py ADDED Viewed

	@@ -0,0 +1,324 @@

+"""
+hf_benchmark.py
+---------------
+NedoTurkishTokenizer'ı TR-MMLU üzerinde benchmark eder.
+HuggingFace'den hem tokenizer hem dataset'i çeker.
+Kullanım:
+    cd NedoTurkishTokenizer/
+    pip install huggingface_hub datasets
+    python hf_benchmark.py
+    # Sadece 2000 örnek (hızlı test):
+    python hf_benchmark.py --samples 2000
+    # Belirli kategori:
+    python hf_benchmark.py --category TUS
+"""
+import argparse
+import json
+import os
+import sys
+import csv
+from pathlib import Path
+# ── HuggingFace token ─────────────────────────────────────────────────────────
+HF_TOKEN = os.environ.get("HF_TOKEN", "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")
+# ── Argümanlar ────────────────────────────────────────────────────────────────
+parser = argparse.ArgumentParser()
+parser.add_argument("--samples",  default="all",  help="Kaç örnek (all veya sayı)")
+parser.add_argument("--category", default=None,   help="Kategori filtresi (ör. TUS)")
+parser.add_argument("--out",      default=".",    help="Çıktı klasörü")
+args = parser.parse_args()
+OUT = Path(args.out)
+OUT.mkdir(parents=True, exist_ok=True)
+# ── HuggingFace login ─────────────────────────────────────────────────────────
+from huggingface_hub import login
+login(token=HF_TOKEN, add_to_git_credential=False)
+print("HuggingFace login ✓")
+# ── Tokenizer yükle ───────────────────────────────────────────────────────────
+print("NedoTurkishTokenizer yükleniyor...")
+sys.path.insert(0, str(Path(__file__).parent))
+from nedo_turkish_tokenizer import NedoTurkishTokenizer
+tok = NedoTurkishTokenizer()
+print(f"  Zemberek: {'✓' if tok.zemberek_available else '✗ (devre dışı)'}")
+# ── Dataset yükle ─────────────────────────────────────────────────────────────
+print("TR-MMLU dataset yükleniyor...")
+from datasets import load_dataset
+ds = load_dataset(
+    "alibayram/turkish_mmlu",
+    split="test",
+    token=HF_TOKEN,
+)
+print(f"  {len(ds)} örnek")
+# ── Kategori filtresi ─────────────────────────────────────────────────────────
+samples = list(ds)
+if args.category:
+    samples = [r for r in samples
+               if str(r.get("bolum","")).strip() == args.category]
+    print(f"  Kategori '{args.category}': {len(samples)} örnek")
+if args.samples != "all":
+    n = int(args.samples)
+    samples = samples[:n]
+    print(f"  Kısıtlı: {len(samples)} örnek")
+# ── Metin alanlarını birleştir ─────────────────────────────────────────────────
+def get_text(row: dict) -> str:
+    parts = []
+    for field in ["soru", "question"]:
+        if row.get(field):
+            parts.append(str(row[field]))
+            break
+    choices = row.get("secenekler") or []
+    if isinstance(choices, list):
+        parts.extend(str(c) for c in choices)
+    for lbl in ["A","B","C","D"]:
+        if row.get(lbl):
+            parts.append(str(row[lbl]))
+    if row.get("aciklama"):
+        parts.append(str(row["aciklama"]))
+    return " ".join(parts)
+# ── Token istatistikleri ──────────────────────────────────────────────────────
+def token_stats(tokens: list[dict]) -> dict:
+    """NedoTurkishTokenizer'ın token_type alanını kullan."""
+    total    = len(tokens)
+    if total == 0:
+        return {"total":0,"bpe":0,"tr_pct":0.0,"pure_pct":0.0,"orig_tr_pct":0.0}
+    # Enhanced stats
+    roots    = sum(1 for t in tokens if t["token_type"] == "ROOT")
+    suffixes = sum(1 for t in tokens if t["token_type"] == "SUFFIX")
+    foreign  = sum(1 for t in tokens if t["token_type"] == "FOREIGN")
+    punct    = sum(1 for t in tokens if t["token_type"] == "PUNCT")
+    bpe      = sum(1 for t in tokens if t["token_type"] == "BPE")
+    special  = sum(1 for t in tokens
+                   if t["token_type"] in ("NUM","DATE","UNIT","URL",
+                                          "MENTION","HASHTAG","EMOJI","ACRONYM"))
+    tr       = roots + suffixes + foreign + punct + special
+    pure     = sum(1 for t in tokens
+                   if t["token_type"] in ("ROOT","SUFFIX","FOREIGN")
+                   and not t["token"].strip().startswith("<"))
+    return {
+        "total":    total,
+        "roots":    roots,
+        "suffixes": suffixes,
+        "foreign":  foreign,
+        "bpe":      bpe,
+        "punct":    punct,
+        "special":  special,
+        "tr_pct":   round(tr / total * 100, 4),
+        "pure_pct": round(pure / total * 100, 4),
+    }
+def orig_stats(tokens: list[dict]) -> dict:
+    """Orijinal tokenizer istatistikleri (karşılaştırma için)."""
+    total = len(tokens)
+    if total == 0:
+        return {"total":0,"bpe":0,"tr_pct":0.0,"pure_pct":0.0}
+    roots    = sum(1 for t in tokens if t.get("type") == "ROOT")
+    suffixes = sum(1 for t in tokens if t.get("type") == "SUFFIX")
+    bpe      = sum(1 for t in tokens if t.get("type") == "BPE")
+    tr       = roots + suffixes
+    pure     = sum(1 for t in tokens
+                   if t.get("type") in ("ROOT","SUFFIX")
+                   and not t.get("token","").strip().startswith("<"))
+    return {
+        "total":    total,
+        "bpe":      bpe,
+        "tr_pct":   round(tr / total * 100, 4),
+        "pure_pct": round(pure / total * 100, 4),
+    }
+# ── Ana benchmark döngüsü ─────────────────────────────────────────────────────
+print(f"\nBenchmark başlıyor: {len(samples)} örnek...")
+per_sample   = []
+orig_tr_sum  = 0.0
+enh_tr_sum   = 0.0
+orig_pur_sum = 0.0
+enh_pur_sum  = 0.0
+orig_tok_sum = 0
+enh_tok_sum  = 0
+orig_bpe_sum = 0
+enh_bpe_sum  = 0
+improved     = 0
+regressed    = 0
+unchanged    = 0
+REPORT_EVERY = 500
+for idx, row in enumerate(samples):
+    text = get_text(row)
+    if not text.strip():
+        continue
+    # Orijinal tokenizer
+    orig_toks = tok._base.tokenize_text(text)
+    os_      = orig_stats(orig_toks)
+    # NedoTurkishTokenizer
+    enh_toks  = tok.tokenize(text)
+    es_       = token_stats(enh_toks)
+    d_tr = round(es_["tr_pct"] - os_["tr_pct"], 4)
+    per_sample.append({
+        "idx":       idx,
+        "bolum":     str(row.get("bolum","")),
+        "orig_tr":   os_["tr_pct"],
+        "enh_tr":    es_["tr_pct"],
+        "d_tr":      d_tr,
+        "orig_pure": os_["pure_pct"],
+        "enh_pure":  es_["pure_pct"],
+        "orig_tok":  os_["total"],
+        "enh_tok":   es_["total"],
+        "orig_bpe":  os_["bpe"],
+        "enh_bpe":   es_["bpe"],
+    })
+    orig_tr_sum  += os_["tr_pct"]
+    enh_tr_sum   += es_["tr_pct"]
+    orig_pur_sum += os_["pure_pct"]
+    enh_pur_sum  += es_["pure_pct"]
+    orig_tok_sum += os_["total"]
+    enh_tok_sum  += es_["total"]
+    orig_bpe_sum += os_["bpe"]
+    enh_bpe_sum  += es_["bpe"]
+    if d_tr > 0:   improved  += 1
+    elif d_tr < 0: regressed += 1
+    else:          unchanged += 1
+    if (idx + 1) % REPORT_EVERY == 0:
+        n = idx + 1
+        print(f"  [{n:>6}/{len(samples)}]  "
+              f"TR%: {enh_tr_sum/n:.2f}%  "
+              f"BPE/örnek: {enh_bpe_sum/n:.2f}  "
+              f"Regressed: {regressed}")
+n = len(per_sample)
+if n == 0:
+    print("Hiç örnek işlenmedi!")
+    sys.exit(1)
+# ── Özet ─────────────────────────────────────────────────────────────────────
+summary = {
+    "n_samples":        n,
+    "orig_tr_pct":      round(orig_tr_sum / n, 4),
+    "orig_pure_pct":    round(orig_pur_sum / n, 4),
+    "enh_tr_pct":       round(enh_tr_sum / n, 4),
+    "enh_pure_pct":     round(enh_pur_sum / n, 4),
+    "delta_tr_pct":     round((enh_tr_sum - orig_tr_sum) / n, 4),
+    "delta_pure_pct":   round((enh_pur_sum - orig_pur_sum) / n, 4),
+    "orig_avg_tokens":  round(orig_tok_sum / n, 2),
+    "enh_avg_tokens":   round(enh_tok_sum / n, 2),
+    "orig_avg_bpe":     round(orig_bpe_sum / n, 2),
+    "enh_avg_bpe":      round(enh_bpe_sum / n, 2),
+    "pct_improved":     round(improved / n * 100, 2),
+    "pct_regressed":    round(regressed / n * 100, 2),
+    "pct_unchanged":    round(unchanged / n * 100, 2),
+}
+# ── Kategori bazında ──────────────────────────────────────────────────────────
+from collections import defaultdict
+cat_scores = defaultdict(list)
+for row in per_sample:
+    cat_scores[row["bolum"]].append(row["enh_tr"])
+cat_summary = {
+    cat: round(sum(v)/len(v), 2)
+    for cat, v in cat_scores.items()
+    if len(v) >= 3
+}
+cat_sorted = sorted(cat_summary.items(), key=lambda x: x[1])
+# ── Rapor yazdır ─────────────────────────────────────────────────────────────
+SEP = "═" * 65
+print(f"\n{SEP}")
+print("  NedoTurkishTokenizer — TR-MMLU Benchmark Sonuçları")
+print(SEP)
+print(f"  N = {n:,} örnek\n")
+print(f"  {'Metrik':30s} {'Orijinal':>10} {'Enhanced':>10} {'Δ':>8}")
+print("  " + "─"*55)
+for label, orig, enh, delta in [
+    ("TR%",          summary["orig_tr_pct"],   summary["enh_tr_pct"],   summary["delta_tr_pct"]),
+    ("Pure%",        summary["orig_pure_pct"], summary["enh_pure_pct"], summary["delta_pure_pct"]),
+    ("Avg token/örn",summary["orig_avg_tokens"],summary["enh_avg_tokens"],
+     round(summary["enh_avg_tokens"]-summary["orig_avg_tokens"],2)),
+    ("Avg BPE/örn",  summary["orig_avg_bpe"],  summary["enh_avg_bpe"],
+     round(summary["enh_avg_bpe"]-summary["orig_avg_bpe"],2)),
+]:
+    print(f"  {label:30s} {orig:>10.2f} {enh:>10.2f} {delta:>+8.2f}")
+print(f"\n  İyileşen : {improved:>6,} (%{summary['pct_improved']:.2f})")
+print(f"  Gerileyen: {regressed:>6,} (%{summary['pct_regressed']:.2f})")
+print(f"  Değişmeyen:{unchanged:>6,} (%{summary['pct_unchanged']:.2f})")
+print(f"\n  En düşük TR% kategoriler:")
+for cat, avg in cat_sorted[:10]:
+    n_cat = len(cat_scores[cat])
+    print(f"    {cat:<35} {avg:>6.2f}%  (n={n_cat})")
+print(f"\n  En yüksek TR% kategoriler:")
+for cat, avg in cat_sorted[-8:]:
+    n_cat = len(cat_scores[cat])
+    print(f"    {cat:<35} {avg:>6.2f}%  (n={n_cat})")
+print(SEP)
+# ── Dosyalara yaz ─────────────────────────────────────────────────────────────
+# Summary JSON
+summary_path = OUT / "tr_mmlu_summary.json"
+with open(summary_path, "w", encoding="utf-8") as f:
+    json.dump(summary, f, ensure_ascii=False, indent=2)
+print(f"\n  ✓ {summary_path}")
+# Report Markdown
+paper_tr  = 90.29
+paper_pur = 85.80
+report_path = OUT / "tr_mmlu_report.md"
+with open(report_path, "w", encoding="utf-8") as f:
+    f.write("# TR-MMLU Benchmark — NedoTurkishTokenizer\n\n")
+    f.write(f"**N = {n:,} örnek**\n\n")
+    f.write("## Ana Metrikler\n\n")
+    f.write("| Metrik | Orijinal | Enhanced | Δ |\n")
+    f.write("|--------|----------|----------|---|\n")
+    f.write(f"| TR% | {summary['orig_tr_pct']:.2f}% | {summary['enh_tr_pct']:.2f}% | {summary['delta_tr_pct']:+.2f}% |\n")
+    f.write(f"| Pure% | {summary['orig_pure_pct']:.2f}% | {summary['enh_pure_pct']:.2f}% | {summary['delta_pure_pct']:+.2f}% |\n")
+    f.write(f"| Avg token/örnek | {summary['orig_avg_tokens']:.2f} | {summary['enh_avg_tokens']:.2f} | {summary['enh_avg_tokens']-summary['orig_avg_tokens']:+.2f} |\n")
+    f.write(f"| Avg BPE/örnek | {summary['orig_avg_bpe']:.2f} | {summary['enh_avg_bpe']:.2f} | {summary['enh_avg_bpe']-summary['orig_avg_bpe']:+.2f} |\n")
+    f.write("\n## Paper ile Karşılaştırma\n\n")
+    f.write("| Metrik | Paper (orijinal) | Bizim (orijinal) | NedoTurkishTokenizer |\n")
+    f.write("|--------|-----------------|-----------------|---------------------|\n")
+    f.write(f"| TR% | {paper_tr}% | {summary['orig_tr_pct']:.2f}% | **{summary['enh_tr_pct']:.2f}%** |\n")
+    f.write(f"| Pure% | {paper_pur}% | {summary['orig_pure_pct']:.2f}% | **{summary['enh_pure_pct']:.2f}%** |\n")
+    f.write("\n## Örnek Dağılımı\n\n")
+    f.write(f"- İyileşen: {improved:,} (%{summary['pct_improved']:.2f})\n")
+    f.write(f"- Gerileyen: {regressed:,} (%{summary['pct_regressed']:.2f})\n")
+    f.write(f"- Değişmeyen: {unchanged:,} (%{summary['pct_unchanged']:.2f})\n")
+    f.write("\n## Kategori Bazında TR%\n\n")
+    f.write("| Kategori | TR% | N |\n")
+    f.write("|----------|-----|---|\n")
+    for cat, avg in cat_sorted:
+        n_cat = len(cat_scores[cat])
+        f.write(f"| {cat} | {avg:.2f}% | {n_cat} |\n")
+print(f"  ✓ {report_path}")
+# Per-sample CSV
+csv_path = OUT / "tr_mmlu_per_sample.csv"
+with open(csv_path, "w", newline="", encoding="utf-8") as f:
+    writer = csv.DictWriter(f, fieldnames=per_sample[0].keys())
+    writer.writeheader()
+    writer.writerows(per_sample)
+print(f"  ✓ {csv_path}")
+print("\nTamamlandı.\n")

nedo_turkish_tokenizer/data/zemberek-full.jar DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:74ee8736b73dc2ca878071b80829f9c5acccc268d4b8b7795d36d60db26a1731
-size 31644792

paper_baseline_check.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+paper_baseline_check.py
+-----------------------
+Paper'ın %90.29 baseline'ını neden biz %75.57 olarak görüyoruz?
+İki metodolojinin farkını somut olarak gösterir.
+Kullanım:
+    cd NedoTurkishTokenizer/
+    python paper_baseline_check.py
+"""
+import os
+from huggingface_hub import login
+from datasets import load_dataset
+from turkish_tokenizer import TurkishTokenizer
+HF_TOKEN = os.environ.get("HF_TOKEN", "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")
+login(token=HF_TOKEN, add_to_git_credential=False)
+base = TurkishTokenizer()
+print("TR-MMLU yükleniyor...")
+ds   = load_dataset("alibayram/turkish_mmlu", split="test", token=HF_TOKEN)
+rows = list(ds)
+print(f"{len(rows)} örnek\n")
+def orig_tr_pct(tokens):
+    total = len(tokens)
+    if total == 0: return 0.0
+    tr = sum(1 for t in tokens if t["type"] in ("ROOT","SUFFIX"))
+    return tr / total * 100
+# ── Metodoloji A: Sadece soru (paper'ın yaptığı) ─────────────────────────────
+a_sum = 0.0
+for row in rows:
+    text = str(row.get("soru") or row.get("question") or "")
+    if not text.strip(): continue
+    toks = base.tokenize_text(text)
+    a_sum += orig_tr_pct(toks)
+a_avg = a_sum / len(rows)
+# ── Metodoloji B: Soru + şıklar (bizim yaptığımız) ───────────────────────────
+b_sum = 0.0
+for row in rows:
+    parts = []
+    for f in ["soru","question"]:
+        if row.get(f):
+            parts.append(str(row[f]))
+            break
+    choices = row.get("secenekler") or []
+    if isinstance(choices, list):
+        parts.extend(str(c) for c in choices)
+    text = " ".join(parts)
+    if not text.strip(): continue
+    toks = base.tokenize_text(text)
+    b_sum += orig_tr_pct(toks)
+b_avg = b_sum / len(rows)
+# ── Metodoloji C: Soru + şıklar + açıklama (tam) ─────────────────────────────
+c_sum = 0.0
+for row in rows:
+    parts = []
+    for f in ["soru","question"]:
+        if row.get(f):
+            parts.append(str(row[f]))
+            break
+    choices = row.get("secenekler") or []
+    if isinstance(choices, list):
+        parts.extend(str(c) for c in choices)
+    if row.get("aciklama"):
+        parts.append(str(row["aciklama"]))
+    text = " ".join(parts)
+    if not text.strip(): continue
+    toks = base.tokenize_text(text)
+    c_sum += orig_tr_pct(toks)
+c_avg = c_sum / len(rows)
+# ── Şıklardaki içerik analizi ─────────────────────────────────────────────────
+# İlk 20 sorunun şıklarına bak
+print("İlk 20 sorunun şık örnekleri:")
+for row in rows[:20]:
+    choices = row.get("secenekler") or []
+    if isinstance(choices, list) and choices:
+        sample = " | ".join(str(c)[:20] for c in choices[:4])
+        print(f"  {sample}")
+print(f"""
+{'='*60}
+METODOLOJİ KARŞILAŞTIRMASI (TürkishTokenizer baseline)
+{'='*60}
+  A) Sadece soru alanı       (paper'ın yöntemi): {a_avg:.2f}%
+  B) Soru + şıklar           (kısmi):            {b_avg:.2f}%
+  C) Soru + şıklar + açıkl.  (bizim yöntemimiz): {c_avg:.2f}%
+  Paper değeri: 90.29%
+  Bizim A değeri: {a_avg:.2f}%  ← paper ile fark: {a_avg-90.29:+.2f}
+  Bizim C değeri: {c_avg:.2f}%  ← biz bunu kullanıyoruz
+  Sonuç: {c_avg:.2f}% vs {a_avg:.2f}% = {c_avg-a_avg:.2f} puan fark
+  Bu fark şıklardaki kısaltmalar ve yabancı terimlerden kaynaklanıyor.
+{'='*60}
+""")

results/tr_mmlu_per_sample.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

results/tr_mmlu_report.md ADDED Viewed

	@@ -0,0 +1,85 @@

+# TR-MMLU Benchmark — NedoTurkishTokenizer
+**N = 40,966 örnek**
+## Ana Metrikler
+| Metrik | Orijinal | Enhanced | Δ |
+|--------|----------|----------|---|
+| TR% | 75.57% | 92.64% | +17.07% |
+| Pure% | 60.50% | 68.30% | +7.80% |
+| Avg token/örnek | 84.75 | 75.38 | -9.37 |
+| Avg BPE/örnek | 21.95 | 5.66 | -16.29 |
+## Paper ile Karşılaştırma
+| Metrik | Paper (orijinal) | Bizim (orijinal) | NedoTurkishTokenizer |
+|--------|-----------------|-----------------|---------------------|
+| TR% | 90.29% | 75.57% | **92.64%** |
+| Pure% | 85.8% | 60.50% | **68.30%** |
+## Örnek Dağılımı
+- İyileşen: 40,964 (%100.00)
+- Gerileyen: 0 (%0.00)
+- Değişmeyen: 2 (%0.00)
+## Kategori Bazında TR%
+| Kategori | TR% | N |
+|----------|-----|---|
+| TUS | 80.25% | 1510 |
+| Turizm ve Otel İşletmeciliği | 82.30% | 185 |
+| Futbol | 89.63% | 283 |
+| DHBT | 90.72% | 336 |
+| Genel Kültür | 91.16% | 84 |
+| Tarım | 91.41% | 586 |
+| Siyer | 91.45% | 1067 |
+| Laborant ve Veteriner Sağlık | 91.91% | 1485 |
+| YGS Denemeleri | 91.92% | 4154 |
+| Kim 500 Milyar İster | 92.06% | 12968 |
+| Tarih | 93.11% | 571 |
+| Parakende Satış ve Mağaza Yöneticiliği | 93.38% | 255 |
+| Dini Bilgiler | 93.49% | 122 |
+| Kültürel Miras ve Turizm | 93.76% | 859 |
+| Uluslar Arası İlişkiler | 93.84% | 117 |
+| Radyo ve Televizyon Programcılığı | 93.88% | 531 |
+| Okul Öncesi Öğretmenliği | 93.92% | 9 |
+| İktisat | 94.01% | 346 |
+| Yaşlı Bakımı | 94.29% | 214 |
+| Özel Koruma ve Güvenlik | 94.32% | 759 |
+| Tıbbi Dökümantasyon ve Sekreterlik | 94.33% | 63 |
+| Türk Dili ve Edebiyatı | 94.33% | 340 |
+| KPSS Denemeleri | 94.33% | 2674 |
+| Yönetim Bİlişim Sistemleri | 94.40% | 294 |
+| Felsefe | 94.43% | 502 |
+| Ehliyet | 94.44% | 271 |
+| Sosyal Hizmet | 94.56% | 64 |
+| Çocuk Gelişimi | 94.58% | 2597 |
+| KPSS | 94.60% | 299 |
+| Sosyal Hizmetler | 94.61% | 935 |
+| Sağlık Yönetimi | 94.63% | 313 |
+| Halkla İlişkiler ve Reklamcılık | 94.70% | 158 |
+| İnsan Kaynakları Yönetimi | 94.92% | 237 |
+| Lojistik | 94.99% | 474 |
+| Menkul Kıymetler ve Sermaye Piyasası | 95.04% | 435 |
+| Konaklama İşletmeciliği | 95.04% | 13 |
+| Medya ve İletişim | 95.05% | 898 |
+| Sosyoloji | 95.06% | 393 |
+| İşletme Yönetimi | 95.20% | 501 |
+| Sağlık Kurumları İşletmeciliği | 95.26% | 575 |
+| Turizm ve Seyehat Hizmetleri | 95.30% | 70 |
+| Dış Ticaret | 95.36% | 1173 |
+| Muhasebe ve Vergi Uygulamaları | 95.49% | 474 |
+| Spor Yönetimi | 95.49% | 80 |
+| Marka İletişimi | 95.62% | 180 |
+| Yerel Yönetimler | 95.67% | 196 |
+| Acil Durum ve Afet Yönetimi | 95.67% | 33 |
+| Maliye | 95.68% | 13 |
+| AUZEF | 95.82% | 47 |
+| Kamu Yönetimi | 95.92% | 54 |
+| Uluslararası Ticaret ve Lojistik Yönetimi | 95.95% | 65 |
+| Havacılık Yönetimi | 96.06% | 15 |
+| İşletme | 96.09% | 24 |
+| Çalışma Ekonomisi ve Endüstri İlişkileri | 96.42% | 62 |
+| Sosyal Bilimler | 96.56% | 3 |

results/tr_mmlu_summary.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "n_samples": 40966,
+  "orig_tr_pct": 75.5727,
+  "orig_pure_pct": 60.499,
+  "enh_tr_pct": 92.6413,
+  "enh_pure_pct": 68.3004,
+  "delta_tr_pct": 17.0686,
+  "delta_pure_pct": 7.8014,
+  "orig_avg_tokens": 84.75,
+  "enh_avg_tokens": 75.38,
+  "orig_avg_bpe": 21.95,
+  "enh_avg_bpe": 5.66,
+  "pct_improved": 100.0,
+  "pct_regressed": 0.0,
+  "pct_unchanged": 0.0
+}