Spaces:
Running
Running
| """ | |
| AIFinder Configuration | |
| Easy configuration for providers and datasets. | |
| """ | |
| import os | |
| # --- Paths --- | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| MODEL_DIR = os.path.join(BASE_DIR, "models") | |
| # ============================================================================ | |
| # EASY PROVIDER CONFIGURATION | |
| # Add new providers here! Each entry: (huggingface_dataset, provider_name, model_name, kwargs) | |
| # ============================================================================ | |
| PROVIDER_DATASETS = [ | |
| # Anthropic | |
| ("TeichAI/claude-4.5-opus-high-reasoning-250x", "Anthropic", "Claude 4.5 Opus", {}), | |
| ( | |
| "TeichAI/claude-sonnet-4.5-high-reasoning-250x", | |
| "Anthropic", | |
| "Claude Sonnet 4.5", | |
| {}, | |
| ), | |
| ( | |
| "Roman1111111/claude-opus-4.6-10000x", | |
| "Anthropic", | |
| "Claude Opus 4.6", | |
| {"max_samples": 1500}, | |
| ), | |
| # OpenAI | |
| ("TeichAI/gpt-5.2-high-reasoning-250x", "OpenAI", "GPT-5.2", {}), | |
| ("TeichAI/gpt-5.1-high-reasoning-1000x", "OpenAI", "GPT-5.1", {}), | |
| ("TeichAI/gpt-5.1-codex-max-1000x", "OpenAI", "GPT-5.1 Codex Max", {}), | |
| ("TeichAI/gpt-5-codex-250x", "OpenAI", "GPT-5 Codex", {}), | |
| ("TeichAI/gpt-5-codex-1000x", "OpenAI", "GPT-5 Codex", {}), | |
| ("TeichAI/gemini-3-pro-preview-high-reasoning-1000x", "Google", "Gemini 3 Pro", {}), | |
| ("TeichAI/gemini-3-pro-preview-high-reasoning-250x", "Google", "Gemini 3 Pro", {}), | |
| ( | |
| "TeichAI/gemini-2.5-flash-11000x", | |
| "Google", | |
| "Gemini 2.5 Flash", | |
| {"max_samples": 1500}, | |
| ), | |
| ("TeichAI/Gemini-3-Flash-Preview-VIBE", "Google", "Gemini 3 Flash", {}), | |
| ("TeichAI/gemini-3-flash-preview-1000x", "Google", "Gemini 3 Flash", {}), | |
| ("TeichAI/gemini-3-flash-preview-complex-1000x", "Google", "Gemini 3 Flash", {}), | |
| # xAI | |
| ("TeichAI/brainstorm-v3.1-grok-4-fast-200x", "xAI", "Grok 4 Fast", {}), | |
| ( | |
| "TeichAI/sherlock-thinking-alpha-11000x", | |
| "xAI", | |
| "Grok 4.1 Fast", | |
| {"max_samples": 1500}, | |
| ), | |
| ("TeichAI/sherlock-dash-alpha-1000x", "xAI", "Grok 4.1 Fast", {}), | |
| ("TeichAI/sherlock-think-alpha-1000x", "xAI", "Grok 4.1 Fast", {}), | |
| ("TeichAI/grok-code-fast-1-1000x", "xAI", "Grok Code Fast 1", {}), | |
| # MoonshotAI | |
| ("TeichAI/kimi-k2-thinking-250x", "MoonshotAI", "Kimi K2", {}), | |
| ("TeichAI/kimi-k2-thinking-1000x", "MoonshotAI", "Kimi K2", {}), | |
| # Mistral | |
| ("TeichAI/mistral-small-creative-500x", "Mistral", "Mistral Small", {}), | |
| # MiniMax | |
| ("TeichAI/MiniMax-M2.1-Code-SFT", "MiniMax", "MiniMax M2.1", {"max_samples": 1500}), | |
| ("TeichAI/convo-v1", "MiniMax", "MiniMax M2.1", {}), | |
| # StepFun | |
| ( | |
| "TeichAI/Step-3.5-Flash-2600x", | |
| "StepFun", | |
| "Step 3.5 Flash", | |
| {"max_samples": 1500}, | |
| ), | |
| # Zhipu | |
| ("TeichAI/Pony-Alpha-15k", "Zhipu", "GLM-5", {"max_samples": 1500}), | |
| # DeepSeek | |
| ("TeichAI/deepseek-v3.2-speciale-1000x", "DeepSeek", "DeepSeek V3.2 Speciale", {}), | |
| ( | |
| "TeichAI/deepseek-v3.2-speciale-openr1-math-3k", | |
| "DeepSeek", | |
| "DeepSeek V3.2 Speciale", | |
| {"max_samples": 1500}, | |
| ), | |
| # DeepSeek (a-m-team) - different format | |
| ( | |
| "a-m-team/AM-DeepSeek-R1-Distilled-1.4M", | |
| "DeepSeek", | |
| "DeepSeek R1", | |
| {"name": "am_0.9M", "max_samples": 1000}, | |
| ), | |
| ] | |
| # Auto-generate DATASET_REGISTRY and PROVIDERS from PROVIDER_DATASETS | |
| DEEPSEEK_AM_DATASETS = [ | |
| (ds_id, prov, model, kwargs) | |
| for ds_id, prov, model, kwargs in PROVIDER_DATASETS | |
| if "a-m-team" in ds_id | |
| ] | |
| DATASET_REGISTRY = [ | |
| (ds_id, prov, model, kwargs) | |
| for ds_id, prov, model, kwargs in PROVIDER_DATASETS | |
| if "a-m-team" not in ds_id | |
| ] | |
| PROVIDERS = sorted(set(prov for _, prov, _, _ in PROVIDER_DATASETS)) | |
| # ============================================================================ | |
| # FEATURE PARAMETERS | |
| # ============================================================================ | |
| TFIDF_WORD_PARAMS = { | |
| "analyzer": "word", | |
| "ngram_range": (1, 2), | |
| "max_features": 20, | |
| "sublinear_tf": True, | |
| "min_df": 3, | |
| "max_df": 0.7, | |
| } | |
| TFIDF_CHAR_PARAMS = { | |
| "analyzer": "char_wb", | |
| "ngram_range": (2, 4), | |
| "max_features": 20, | |
| "sublinear_tf": True, | |
| "min_df": 3, | |
| "max_df": 0.7, | |
| "smooth_idf": True, | |
| } | |
| # ============================================================================ | |
| # TRAINING PARAMETERS | |
| # ============================================================================ | |
| MAX_SAMPLES_PER_PROVIDER = 1000 | |
| TEST_SIZE = 0.15 | |
| VAL_SIZE = 0.10 | |
| RANDOM_STATE = 42 | |
| # Neural Network (unused currently, but kept for reference) | |
| HIDDEN_DIM = 256 | |
| EMBED_DIM = 128 | |
| DROPOUT = 0.7 | |
| BATCH_SIZE = 128 | |
| EPOCHS = 80 | |
| EARLY_STOP_PATIENCE = 25 | |
| LEARNING_RATE = 3e-5 | |
| WEIGHT_DECAY = 8e-2 | |
| LABEL_SMOOTHING = 0.3 | |