| combinations: | |
| cerebras_cerebras-gpt-2.7b_tokyotech_llm_swallow_code: | |
| model_name: cerebras/Cerebras-GPT-2.7B | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/cerebras_cerebras-gpt-2.7b_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| cerebras_cerebras-gpt-2.7b_codeparrot_codeparrot_clean: | |
| model_name: cerebras/Cerebras-GPT-2.7B | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/cerebras_cerebras-gpt-2.7b_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| cerebras_cerebras-gpt-2.7b_jetbrains_kstack: | |
| model_name: cerebras/Cerebras-GPT-2.7B | |
| dataset_name: JetBrains/KStack | |
| path: features/train/cerebras_cerebras-gpt-2.7b_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| cerebras_cerebras-gpt-2.7b_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: cerebras/Cerebras-GPT-2.7B | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/cerebras_cerebras-gpt-2.7b_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| cerebras_cerebras-gpt-2.7b_nan_do_code_search_net_java: | |
| model_name: cerebras/Cerebras-GPT-2.7B | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/cerebras_cerebras-gpt-2.7b_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| cerebras_cerebras-gpt-2.7b_bigcode_the_stack_smol_xl: | |
| model_name: cerebras/Cerebras-GPT-2.7B | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/cerebras_cerebras-gpt-2.7b_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3200 | |
| val_size: 400 | |
| test_size: 400 | |
| eleutherai_pythia-1.4b_tokyotech_llm_swallow_code: | |
| model_name: EleutherAI/pythia-1.4b | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/eleutherai_pythia-1.4b_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| eleutherai_pythia-1.4b_codeparrot_codeparrot_clean: | |
| model_name: EleutherAI/pythia-1.4b | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/eleutherai_pythia-1.4b_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| eleutherai_pythia-1.4b_jetbrains_kstack: | |
| model_name: EleutherAI/pythia-1.4b | |
| dataset_name: JetBrains/KStack | |
| path: features/train/eleutherai_pythia-1.4b_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| eleutherai_pythia-1.4b_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: EleutherAI/pythia-1.4b | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/eleutherai_pythia-1.4b_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| eleutherai_pythia-1.4b_nan_do_code_search_net_java: | |
| model_name: EleutherAI/pythia-1.4b | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/eleutherai_pythia-1.4b_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| eleutherai_pythia-1.4b_bigcode_the_stack_smol_xl: | |
| model_name: EleutherAI/pythia-1.4b | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/eleutherai_pythia-1.4b_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3600 | |
| val_size: 200 | |
| test_size: 200 | |
| eleutherai_gpt-j-6b_tokyotech_llm_swallow_code: | |
| model_name: EleutherAI/gpt-j-6b | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/eleutherai_gpt-j-6b_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| eleutherai_gpt-j-6b_codeparrot_codeparrot_clean: | |
| model_name: EleutherAI/gpt-j-6b | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/eleutherai_gpt-j-6b_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| eleutherai_gpt-j-6b_jetbrains_kstack: | |
| model_name: EleutherAI/gpt-j-6b | |
| dataset_name: JetBrains/KStack | |
| path: features/train/eleutherai_gpt-j-6b_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| eleutherai_gpt-j-6b_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: EleutherAI/gpt-j-6b | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/eleutherai_gpt-j-6b_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| eleutherai_gpt-j-6b_nan_do_code_search_net_java: | |
| model_name: EleutherAI/gpt-j-6b | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/eleutherai_gpt-j-6b_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| eleutherai_gpt-j-6b_bigcode_the_stack_smol_xl: | |
| model_name: EleutherAI/gpt-j-6b | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/eleutherai_gpt-j-6b_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3600 | |
| val_size: 200 | |
| test_size: 200 | |
| google_gemma-2b_tokyotech_llm_swallow_code: | |
| model_name: google/gemma-2b | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/google_gemma-2b_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| google_gemma-2b_codeparrot_codeparrot_clean: | |
| model_name: google/gemma-2b | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/google_gemma-2b_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| google_gemma-2b_jetbrains_kstack: | |
| model_name: google/gemma-2b | |
| dataset_name: JetBrains/KStack | |
| path: features/train/google_gemma-2b_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| google_gemma-2b_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: google/gemma-2b | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/google_gemma-2b_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| google_gemma-2b_nan_do_code_search_net_java: | |
| model_name: google/gemma-2b | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/google_gemma-2b_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| google_gemma-2b_bigcode_the_stack_smol_xl: | |
| model_name: google/gemma-2b | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/google_gemma-2b_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3600 | |
| val_size: 200 | |
| test_size: 200 | |
| qwen_qwen2-1.5b_tokyotech_llm_swallow_code: | |
| model_name: Qwen/Qwen2-1.5B | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/qwen_qwen2-1.5b_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| qwen_qwen2-1.5b_codeparrot_codeparrot_clean: | |
| model_name: Qwen/Qwen2-1.5B | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/qwen_qwen2-1.5b_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| qwen_qwen2-1.5b_jetbrains_kstack: | |
| model_name: Qwen/Qwen2-1.5B | |
| dataset_name: JetBrains/KStack | |
| path: features/train/qwen_qwen2-1.5b_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| qwen_qwen2-1.5b_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: Qwen/Qwen2-1.5B | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/qwen_qwen2-1.5b_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| qwen_qwen2-1.5b_nan_do_code_search_net_java: | |
| model_name: Qwen/Qwen2-1.5B | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/qwen_qwen2-1.5b_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| qwen_qwen2-1.5b_bigcode_the_stack_smol_xl: | |
| model_name: Qwen/Qwen2-1.5B | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/qwen_qwen2-1.5b_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3600 | |
| val_size: 200 | |
| test_size: 200 | |
| tiiuae_falcon-rw-1b_tokyotech_llm_swallow_code: | |
| model_name: tiiuae/falcon-rw-1b | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/tiiuae_falcon-rw-1b_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-rw-1b_codeparrot_codeparrot_clean: | |
| model_name: tiiuae/falcon-rw-1b | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/tiiuae_falcon-rw-1b_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-rw-1b_jetbrains_kstack: | |
| model_name: tiiuae/falcon-rw-1b | |
| dataset_name: JetBrains/KStack | |
| path: features/train/tiiuae_falcon-rw-1b_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-rw-1b_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: tiiuae/falcon-rw-1b | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/tiiuae_falcon-rw-1b_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-rw-1b_nan_do_code_search_net_java: | |
| model_name: tiiuae/falcon-rw-1b | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/tiiuae_falcon-rw-1b_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-rw-1b_bigcode_the_stack_smol_xl: | |
| model_name: tiiuae/falcon-rw-1b | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/tiiuae_falcon-rw-1b_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3600 | |
| val_size: 200 | |
| test_size: 200 | |
| distilgpt2_bigcode_the_stack_smol_xl: | |
| model_name: distilgpt2 | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/distilgpt2_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3200 | |
| val_size: 400 | |
| test_size: 400 | |
| distilgpt2_codeparrot_codeparrot_clean: | |
| model_name: distilgpt2 | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/distilgpt2_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| distilgpt2_jetbrains_kstack: | |
| model_name: distilgpt2 | |
| dataset_name: JetBrains/KStack | |
| path: features/train/distilgpt2_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| distilgpt2_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: distilgpt2 | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/distilgpt2_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| distilgpt2_nan_do_code_search_net_java: | |
| model_name: distilgpt2 | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/distilgpt2_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| distilgpt2_tokyotech_llm_swallow_code: | |
| model_name: distilgpt2 | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/distilgpt2_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| gpt2-xl_bigcode_the_stack_smol_xl: | |
| model_name: gpt2-xl | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/gpt2-xl_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3200 | |
| val_size: 400 | |
| test_size: 400 | |
| gpt2-xl_codeparrot_codeparrot_clean: | |
| model_name: gpt2-xl | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/gpt2-xl_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| gpt2-xl_jetbrains_kstack: | |
| model_name: gpt2-xl | |
| dataset_name: JetBrains/KStack | |
| path: features/train/gpt2-xl_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| gpt2-xl_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: gpt2-xl | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/gpt2-xl_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| gpt2-xl_nan_do_code_search_net_java: | |
| model_name: gpt2-xl | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/gpt2-xl_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| gpt2-xl_tokyotech_llm_swallow_code: | |
| model_name: gpt2-xl | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/gpt2-xl_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 8000 | |
| val_size: 1000 | |
| test_size: 1000 | |
| tiiuae_falcon-7b_tokyotech_llm_swallow_code: | |
| model_name: tiiuae/falcon-7b | |
| dataset_name: tokyotech-llm/swallow-code | |
| path: features/train/tiiuae_falcon-7b_tokyotech_llm_swallow_code | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-7b_codeparrot_codeparrot_clean: | |
| model_name: tiiuae/falcon-7b | |
| dataset_name: codeparrot/codeparrot-clean | |
| path: features/train/tiiuae_falcon-7b_codeparrot_codeparrot_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-7b_jetbrains_kstack: | |
| model_name: tiiuae/falcon-7b | |
| dataset_name: JetBrains/KStack | |
| path: features/train/tiiuae_falcon-7b_jetbrains_kstack | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-7b_mvasiliniuc_iva_kotlin_codeint_clean: | |
| model_name: tiiuae/falcon-7b | |
| dataset_name: mvasiliniuc/iva-kotlin-codeint-clean | |
| path: features/train/tiiuae_falcon-7b_mvasiliniuc_iva_kotlin_codeint_clean | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-7b_nan_do_code_search_net_java: | |
| model_name: tiiuae/falcon-7b | |
| dataset_name: Nan-Do/code-search-net-java | |
| path: features/train/tiiuae_falcon-7b_nan_do_code_search_net_java | |
| n_members: 10000 | |
| n_nonmembers: 10000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 9000 | |
| val_size: 500 | |
| test_size: 500 | |
| tiiuae_falcon-7b_bigcode_the_stack_smol_xl: | |
| model_name: tiiuae/falcon-7b | |
| dataset_name: bigcode/the-stack-smol-xl | |
| path: features/train/tiiuae_falcon-7b_bigcode_the_stack_smol_xl | |
| n_members: 4000 | |
| n_nonmembers: 4000 | |
| feature_dim: 154 | |
| sequence_length: 128 | |
| train_size: 3600 | |
| val_size: 200 | |
| test_size: 200 | |