| { | |
| "experiment": "A_B_downstream_crosslingual", | |
| "timestamp": "2026-04-13 09:59:30 UTC", | |
| "base": { | |
| "sentiment": { | |
| "he": { | |
| "logprob_acc": 0.53, | |
| "logprob_n": 200, | |
| "gen_acc": 0.0, | |
| "gen_n": 50 | |
| }, | |
| "ar": { | |
| "logprob_acc": 0.45, | |
| "logprob_n": 200, | |
| "gen_acc": 0.0, | |
| "gen_n": 50 | |
| }, | |
| "fa": { | |
| "logprob_acc": 0.605, | |
| "logprob_n": 200, | |
| "gen_acc": 0.0, | |
| "gen_n": 50 | |
| }, | |
| "en": { | |
| "logprob_acc": 0.515, | |
| "logprob_n": 200, | |
| "gen_acc": 0.0, | |
| "gen_n": 50 | |
| } | |
| }, | |
| "belebele": { | |
| "he": { | |
| "accuracy": 0.26, | |
| "n": 100 | |
| }, | |
| "ar": { | |
| "accuracy": 0.22, | |
| "n": 100 | |
| }, | |
| "fa": { | |
| "accuracy": 0.19, | |
| "n": 100 | |
| }, | |
| "en": { | |
| "accuracy": 0.19, | |
| "n": 100 | |
| } | |
| } | |
| }, | |
| "en_sft": { | |
| "sentiment": { | |
| "he": { | |
| "logprob_acc": 0.515, | |
| "logprob_n": 200, | |
| "gen_acc": 0.0, | |
| "gen_n": 50 | |
| }, | |
| "ar": { | |
| "logprob_acc": 0.465, | |
| "logprob_n": 200, | |
| "gen_acc": 0.0, | |
| "gen_n": 50 | |
| }, | |
| "fa": { | |
| "logprob_acc": 0.585, | |
| "logprob_n": 200, | |
| "gen_acc": 0.0, | |
| "gen_n": 50 | |
| }, | |
| "en": { | |
| "logprob_acc": 0.52, | |
| "logprob_n": 200, | |
| "gen_acc": 0.02, | |
| "gen_n": 50 | |
| } | |
| }, | |
| "belebele": { | |
| "he": { | |
| "accuracy": 0.26, | |
| "n": 100 | |
| }, | |
| "ar": { | |
| "accuracy": 0.23, | |
| "n": 100 | |
| }, | |
| "fa": { | |
| "accuracy": 0.2, | |
| "n": 100 | |
| }, | |
| "en": { | |
| "accuracy": 0.2, | |
| "n": 100 | |
| } | |
| } | |
| }, | |
| "multilingual_sft": { | |
| "sentiment": { | |
| "he": { | |
| "logprob_acc": 0.845, | |
| "logprob_n": 200, | |
| "gen_acc": 0.82, | |
| "gen_n": 50 | |
| }, | |
| "ar": { | |
| "logprob_acc": 0.605, | |
| "logprob_n": 200, | |
| "gen_acc": 0.64, | |
| "gen_n": 50 | |
| }, | |
| "fa": { | |
| "logprob_acc": 0.785, | |
| "logprob_n": 200, | |
| "gen_acc": 0.74, | |
| "gen_n": 50 | |
| }, | |
| "en": { | |
| "logprob_acc": 0.73, | |
| "logprob_n": 200, | |
| "gen_acc": 0.64, | |
| "gen_n": 50 | |
| } | |
| }, | |
| "belebele": { | |
| "he": { | |
| "accuracy": 0.28, | |
| "n": 100 | |
| }, | |
| "ar": { | |
| "accuracy": 0.24, | |
| "n": 100 | |
| }, | |
| "fa": { | |
| "accuracy": 0.25, | |
| "n": 100 | |
| }, | |
| "en": { | |
| "accuracy": 0.23, | |
| "n": 100 | |
| } | |
| } | |
| }, | |
| "generation_samples": [ | |
| { | |
| "prompt": "Write a short sentence in Hebrew about the weather.", | |
| "response": "-הייתי עייף." | |
| }, | |
| { | |
| "prompt": "اكتب جملة قصيرة عن الطقس بالعربية.", | |
| "response": "- I'm sorry." | |
| }, | |
| { | |
| "prompt": "یک جمله کوتاه درباره آب و هوا به فارسی بنویسید.", | |
| "response": "- What is the weather like in the summer?" | |
| }, | |
| { | |
| "prompt": "What is the capital of Israel?", | |
| "response": "מה זה?" | |
| } | |
| ] | |
| } |