{"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.12.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceType":"datasetVersion","sourceId":15939178,"datasetId":10221891,"databundleVersionId":16897176}],"dockerImageVersionId":31329,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":5,"nbformat":4,"cells":[{"id":"a9cb149f-88e4-475b-9e75-d32bb200385a","cell_type":"markdown","source":"# Work-Life Firewall Kaggle Runner\n\nRun this notebook top-to-bottom in Kaggle with GPU enabled.\n\n## Required Kaggle Secrets\n- `WANDB_API_KEY`\n- `HF_TOKEN` (optional, only needed for model upload)\n","metadata":{}},{"id":"65a0855b-ddb2-4bee-ad66-8c61085b0b9e","cell_type":"code","source":"import os\nimport shutil\nimport subprocess\nfrom pathlib import Path\n\n# Override via env vars if needed.\nGITHUB_REPO = os.environ.get('GITHUB_REPO', 'https://github.com/EchoOfCode/meta_hackathon.git')\nGITHUB_BRANCH = os.environ.get('GITHUB_BRANCH', 'main')\nWORK_REPO_DIR = Path('/kaggle/working/meta_hackathon')\nSAFE_CWD = Path('/kaggle/working')\n\n\ndef _run(cmd, cwd=None):\n run_cwd = str(cwd or SAFE_CWD)\n proc = subprocess.run(cmd, cwd=run_cwd, text=True, capture_output=True)\n if proc.returncode != 0:\n raise RuntimeError(\n f\"Command failed: {' '.join(cmd)}\\nSTDOUT:\\n{proc.stdout}\\nSTDERR:\\n{proc.stderr}\"\n )\n return proc\n\n\ndef _repo_url_with_token(repo_url: str) -> str:\n token = os.environ.get('GITHUB_TOKEN')\n if token and repo_url.startswith('https://github.com/'):\n return repo_url.replace('https://', f'https://{token}@', 1)\n return repo_url\n\n\ndef sync_repo_from_github() -> Path:\n \"\"\"Always get latest branch tip into /kaggle/working/meta_hackathon.\"\"\"\n repo_url = _repo_url_with_token(GITHUB_REPO)\n\n # Ensure we are never inside a path that may be deleted during sync.\n SAFE_CWD.mkdir(parents=True, exist_ok=True)\n os.chdir(SAFE_CWD)\n\n if (WORK_REPO_DIR / '.git').exists():\n _run(['git', 'fetch', 'origin', GITHUB_BRANCH], cwd=WORK_REPO_DIR)\n _run(['git', 'reset', '--hard', f'origin/{GITHUB_BRANCH}'], cwd=WORK_REPO_DIR)\n else:\n if WORK_REPO_DIR.exists():\n shutil.rmtree(WORK_REPO_DIR)\n _run([\n 'git', 'clone', '--depth', '1', '--branch', GITHUB_BRANCH, repo_url, str(WORK_REPO_DIR)\n ], cwd=SAFE_CWD)\n\n return WORK_REPO_DIR\n\n\ndef find_repo_dir_from_inputs() -> Path:\n \"\"\"Fallback for offline runs using attached Kaggle dataset input.\"\"\"\n candidates = []\n\n working = Path('/kaggle/working')\n if working.exists():\n candidates.append(working)\n candidates.extend([p for p in working.iterdir() if p.is_dir()])\n\n input_root = Path('/kaggle/input')\n if input_root.exists():\n candidates.append(input_root)\n for ds in input_root.iterdir():\n if ds.is_dir():\n candidates.append(ds)\n candidates.extend([p for p in ds.iterdir() if p.is_dir()])\n\n for c in candidates:\n if (c / 'openenv.yaml').exists() and (c / 'training').exists():\n return c\n\n raise FileNotFoundError(\n 'Could not find project root with openenv.yaml + training/. '\n 'Attach dataset input or enable internet for GitHub sync.'\n )\n\n\n# Default behavior: pull latest from GitHub each run.\nUSE_GITHUB_SYNC = os.environ.get('USE_GITHUB_SYNC', '1') == '1'\n\nif USE_GITHUB_SYNC:\n try:\n REPO_DIR = sync_repo_from_github()\n print('Synced latest code from GitHub:', GITHUB_REPO, '@', GITHUB_BRANCH)\n except Exception as exc:\n print('GitHub sync failed, falling back to /kaggle/input snapshot:')\n print(exc)\n REPO_DIR = find_repo_dir_from_inputs()\nelse:\n REPO_DIR = find_repo_dir_from_inputs()\n\n# /kaggle/input is read-only; copy to /kaggle/working for training outputs.\nif str(REPO_DIR).startswith('/kaggle/input'):\n if WORK_REPO_DIR.exists() and WORK_REPO_DIR != REPO_DIR:\n shutil.rmtree(WORK_REPO_DIR)\n shutil.copytree(REPO_DIR, WORK_REPO_DIR, dirs_exist_ok=True)\n REPO_DIR = WORK_REPO_DIR\n\nos.chdir(REPO_DIR)\nprint('CWD:', Path.cwd())\nprint('Using project root:', REPO_DIR)\n!python --version\n!nvidia-smi","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T03:45:36.547580Z","iopub.execute_input":"2026-04-26T03:45:36.548488Z","iopub.status.idle":"2026-04-26T03:45:38.073572Z","shell.execute_reply.started":"2026-04-26T03:45:36.548450Z","shell.execute_reply":"2026-04-26T03:45:38.072636Z"}},"outputs":[{"name":"stdout","text":"Synced latest code from GitHub: https://github.com/EchoOfCode/meta_hackathon.git @ main\nCWD: /kaggle/working/meta_hackathon\nUsing project root: /kaggle/working/meta_hackathon\nPython 3.12.12\nSun Apr 26 03:45:37 2026 \n+-----------------------------------------------------------------------------------------+\n| NVIDIA-SMI 580.105.08 Driver Version: 580.105.08 CUDA Version: 13.0 |\n+-----------------------------------------+------------------------+----------------------+\n| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n| | | MIG M. |\n|=========================================+========================+======================|\n| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n| N/A 52C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n| | | N/A |\n+-----------------------------------------+------------------------+----------------------+\n| 1 Tesla T4 Off | 00000000:00:05.0 Off | 0 |\n| N/A 42C P8 14W / 70W | 0MiB / 15360MiB | 0% Default |\n| | | N/A |\n+-----------------------------------------+------------------------+----------------------+\n\n+-----------------------------------------------------------------------------------------+\n| Processes: |\n| GPU GI CI PID Type Process name GPU Memory |\n| ID ID Usage |\n|=========================================================================================|\n| No running processes found |\n+-----------------------------------------------------------------------------------------+\n","output_type":"stream"}],"execution_count":9},{"id":"53276eab-890f-494e-b692-c02c1c727ecb","cell_type":"code","source":"!pip install -r requirements.txt\n!pip install -q openenv trl==0.23.1 unsloth bitsandbytes huggingface_hub","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T03:45:38.075516Z","iopub.execute_input":"2026-04-26T03:45:38.075898Z","iopub.status.idle":"2026-04-26T03:45:45.905935Z","shell.execute_reply.started":"2026-04-26T03:45:38.075843Z","shell.execute_reply":"2026-04-26T03:45:45.905134Z"}},"outputs":[{"name":"stdout","text":"Requirement already satisfied: openenv in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 3)) (0.1.13)\nRequirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 4)) (2.0.2)\nRequirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 5)) (2.3.3)\nRequirement already satisfied: matplotlib in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 6)) (3.10.0)\nRequirement already satisfied: datasets>=2.20.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 9)) (4.3.0)\nRequirement already satisfied: transformers==4.57.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 10)) (4.57.2)\nRequirement already satisfied: trl==0.23.1 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 11)) (0.23.1)\nRequirement already satisfied: peft>=0.12.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 12)) (0.18.1)\nRequirement already satisfied: bitsandbytes>=0.43.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 13)) (0.49.2)\nRequirement already satisfied: accelerate>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 14)) (1.6.0)\nRequirement already satisfied: mergekit>=0.1.4 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 15)) (0.1.4)\nRequirement already satisfied: huggingface_hub>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 16)) (0.36.2)\nRequirement already satisfied: wandb>=0.17.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 19)) (0.25.0)\nRequirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (3.24.3)\nRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (26.0)\nRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (6.0.3)\nRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (2025.11.3)\nRequirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (2.32.4)\nRequirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (0.22.2)\nRequirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (0.5.3)\nRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers==4.57.2->-r requirements.txt (line 10)) (4.67.1)\nRequirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->-r requirements.txt (line 5)) (2.9.0.post0)\nRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->-r requirements.txt (line 5)) (2025.2)\nRequirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->-r requirements.txt (line 5)) (2025.3)\nRequirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->-r requirements.txt (line 6)) (1.3.3)\nRequirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib->-r requirements.txt (line 6)) (0.12.1)\nRequirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib->-r requirements.txt (line 6)) (4.61.1)\nRequirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->-r requirements.txt (line 6)) (1.4.9)\nRequirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib->-r requirements.txt (line 6)) (11.3.0)\nRequirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->-r requirements.txt (line 6)) (3.3.2)\nRequirement already satisfied: pyarrow>=21.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets>=2.20.0->-r requirements.txt (line 9)) (23.0.1)\nRequirement already satisfied: dill<0.4.1,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets>=2.20.0->-r requirements.txt (line 9)) (0.4.0)\nRequirement already satisfied: httpx<1.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets>=2.20.0->-r requirements.txt (line 9)) (0.28.1)\nRequirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets>=2.20.0->-r requirements.txt (line 9)) (3.6.0)\nRequirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets>=2.20.0->-r requirements.txt (line 9)) (0.70.16)\nRequirement already satisfied: fsspec<=2025.9.0,>=2023.1.0 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (2025.9.0)\nRequirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from peft>=0.12.0->-r requirements.txt (line 12)) (5.9.5)\nRequirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.12/dist-packages (from peft>=0.12.0->-r requirements.txt (line 12)) (2.10.0+cu128)\nRequirement already satisfied: click==8.2.1 in /usr/local/lib/python3.12/dist-packages (from mergekit>=0.1.4->-r requirements.txt (line 15)) (8.2.1)\nRequirement already satisfied: pydantic~=2.10.6 in /usr/local/lib/python3.12/dist-packages (from mergekit>=0.1.4->-r requirements.txt (line 15)) (2.10.6)\nRequirement already satisfied: immutables==0.21 in /usr/local/lib/python3.12/dist-packages (from mergekit>=0.1.4->-r requirements.txt (line 15)) (0.21)\nRequirement already satisfied: typing-extensions in /usr/local/lib/python3.12/dist-packages (from mergekit>=0.1.4->-r requirements.txt (line 15)) (4.15.0)\nRequirement already satisfied: sentencepiece in /usr/local/lib/python3.12/dist-packages (from mergekit>=0.1.4->-r requirements.txt (line 15)) (0.2.1)\nRequirement already satisfied: protobuf in /usr/local/lib/python3.12/dist-packages (from mergekit>=0.1.4->-r requirements.txt (line 15)) (5.29.5)\nRequirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (from mergekit>=0.1.4->-r requirements.txt (line 15)) (1.16.3)\nRequirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub>=0.34.0->-r requirements.txt (line 16)) (1.4.3)\nRequirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb>=0.17.0->-r requirements.txt (line 19)) (3.1.46)\nRequirement already satisfied: platformdirs in /usr/local/lib/python3.12/dist-packages (from wandb>=0.17.0->-r requirements.txt (line 19)) (4.9.2)\nRequirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb>=0.17.0->-r requirements.txt (line 19)) (2.53.0)\nRequirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (3.13.3)\nRequirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.12/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb>=0.17.0->-r requirements.txt (line 19)) (4.0.12)\nRequirement already satisfied: anyio in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets>=2.20.0->-r requirements.txt (line 9)) (4.12.1)\nRequirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets>=2.20.0->-r requirements.txt (line 9)) (2026.1.4)\nRequirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets>=2.20.0->-r requirements.txt (line 9)) (1.0.9)\nRequirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from httpx<1.0.0->datasets>=2.20.0->-r requirements.txt (line 9)) (3.11)\nRequirement already satisfied: h11>=0.16 in /usr/local/lib/python3.12/dist-packages (from httpcore==1.*->httpx<1.0.0->datasets>=2.20.0->-r requirements.txt (line 9)) (0.16.0)\nRequirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.12/dist-packages (from pydantic~=2.10.6->mergekit>=0.1.4->-r requirements.txt (line 15)) (0.7.0)\nRequirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.12/dist-packages (from pydantic~=2.10.6->mergekit>=0.1.4->-r requirements.txt (line 15)) (2.27.2)\nRequirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->-r requirements.txt (line 5)) (1.17.0)\nRequirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.57.2->-r requirements.txt (line 10)) (3.4.4)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.57.2->-r requirements.txt (line 10)) (2.5.0)\nRequirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (75.2.0)\nRequirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (1.14.0)\nRequirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (3.6.1)\nRequirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (3.1.6)\nRequirement already satisfied: cuda-bindings==12.9.4 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.9.4)\nRequirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.8.93)\nRequirement already satisfied: nvidia-cuda-runtime-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.8.90)\nRequirement already satisfied: nvidia-cuda-cupti-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.8.90)\nRequirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (9.10.2.21)\nRequirement already satisfied: nvidia-cublas-cu12==12.8.4.1 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.8.4.1)\nRequirement already satisfied: nvidia-cufft-cu12==11.3.3.83 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (11.3.3.83)\nRequirement already satisfied: nvidia-curand-cu12==10.3.9.90 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (10.3.9.90)\nRequirement already satisfied: nvidia-cusolver-cu12==11.7.3.90 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (11.7.3.90)\nRequirement already satisfied: nvidia-cusparse-cu12==12.5.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.5.8.93)\nRequirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (0.7.1)\nRequirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (2.27.5)\nRequirement already satisfied: nvidia-nvshmem-cu12==3.4.5 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (3.4.5)\nRequirement already satisfied: nvidia-nvtx-cu12==12.8.90 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.8.90)\nRequirement already satisfied: nvidia-nvjitlink-cu12==12.8.93 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (12.8.93)\nRequirement already satisfied: nvidia-cufile-cu12==1.13.1.3 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (1.13.1.3)\nRequirement already satisfied: triton==3.6.0 in /usr/local/lib/python3.12/dist-packages (from torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (3.6.0)\nRequirement already satisfied: cuda-pathfinder~=1.1 in /usr/local/lib/python3.12/dist-packages (from cuda-bindings==12.9.4->torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (1.3.5)\nRequirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (2.6.1)\nRequirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (1.4.0)\nRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (25.4.0)\nRequirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (1.8.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (6.7.1)\nRequirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (0.4.1)\nRequirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.9.0,>=2023.1.0->datasets>=2.20.0->-r requirements.txt (line 9)) (1.22.0)\nRequirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.12/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb>=0.17.0->-r requirements.txt (line 19)) (5.0.2)\nRequirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (1.3.0)\nRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch>=1.13.0->peft>=0.12.0->-r requirements.txt (line 12)) (3.0.3)\n","output_type":"stream"}],"execution_count":10},{"id":"6382be63","cell_type":"code","source":"import os\nfrom kaggle_secrets import UserSecretsClient\n\nsecrets = UserSecretsClient()\n\n# WandB token (required for tracking)\nos.environ['WANDB_API_KEY'] = secrets.get_secret('WANDB_API_KEY')\nos.environ['WANDB_PROJECT'] = 'meta_hackathon'\n\n# HF token is optional; needed only if you push model from Kaggle\ntry:\n os.environ['HF_TOKEN'] = secrets.get_secret('HF_TOKEN')\n os.environ['HUGGINGFACE_HUB_TOKEN'] = os.environ['HF_TOKEN']\n print('HF token loaded from Kaggle Secrets')\nexcept Exception:\n print('HF token not set (this is okay if you are not uploading model now)')\n\nprint('WandB key loaded:', bool(os.environ.get('WANDB_API_KEY')))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T03:45:45.907243Z","iopub.execute_input":"2026-04-26T03:45:45.907933Z","iopub.status.idle":"2026-04-26T03:45:46.353343Z","shell.execute_reply.started":"2026-04-26T03:45:45.907893Z","shell.execute_reply":"2026-04-26T03:45:46.352642Z"}},"outputs":[{"name":"stdout","text":"HF token loaded from Kaggle Secrets\nWandB key loaded: True\n","output_type":"stream"}],"execution_count":11},{"id":"94f5f993","cell_type":"code","source":"# Fast stable run for Kaggle T4\n!python training/train.py --mode real --speed-preset fast --size-preset medium --steps 300 --run-name meta-kaggle-final --wandb-project meta_hackathon","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T03:45:46.354224Z","iopub.execute_input":"2026-04-26T03:45:46.354596Z","iopub.status.idle":"2026-04-26T04:43:28.128824Z","shell.execute_reply.started":"2026-04-26T03:45:46.354571Z","shell.execute_reply":"2026-04-26T04:43:28.128065Z"}},"outputs":[{"name":"stdout","text":"Skipping import of cpp extensions due to incompatible torch version. Please upgrade to torch >= 2.11.0 (found 2.10.0+cu128).\n2026-04-26 03:45:53.182429: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\nWARNING: All log messages before absl::InitializeLog() is called are written to STDERR\nE0000 00:00:1777175153.204707 600 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\nE0000 00:00:1777175153.213304 600 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\nW0000 00:00:1777175153.233121 600 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\nW0000 00:00:1777175153.233178 600 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\nW0000 00:00:1777175153.233191 600 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\nW0000 00:00:1777175153.233198 600 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n`torch_dtype` is deprecated! Use `dtype` instead!\nTrainable params: 18,464,768 / 907,081,216 (2.04%)\nThe tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.\n\u001b[34m\u001b[1mwandb\u001b[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from WANDB_API_KEY.\n\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33myusufindian09\u001b[0m (\u001b[33myusufindian09-aaa\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m Waiting for wandb.init()...\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m setting up run f44p90zl (0.2s)\n\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m setting up run f44p90zl (0.2s)\n\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.25.0\n\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/kaggle/working/meta_hackathon/wandb/run-20260426_034610-f44p90zl\u001b[0m\n\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mmeta-kaggle-final\u001b[0m\n\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/yusufindian09-aaa/meta_hackathon\u001b[0m\n\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/yusufindian09-aaa/meta_hackathon/runs/f44p90zl\u001b[0m\n\u001b[34m\u001b[1mwandb\u001b[0m: Detected [huggingface_hub.inference, openai] in use.\n\u001b[34m\u001b[1mwandb\u001b[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.\n\u001b[34m\u001b[1mwandb\u001b[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/\n{'loss': 0.0, 'grad_norm': 0.5486502051353455, 'learning_rate': 1.9733333333333336e-05, 'num_tokens': 2002.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.5388020515441894, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.05}\n{'loss': 0.0, 'grad_norm': 0.5796840190887451, 'learning_rate': 1.94e-05, 'num_tokens': 4000.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.42000000476837157, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.42000000476837157, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.4, 'entropy': 2.489062452316284, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.1}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.9066666666666668e-05, 'num_tokens': 5986.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.422395849227905, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.14}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.8733333333333336e-05, 'num_tokens': 7974.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.35500000715255736, 'rewards/reward_funcs/std': 0.0919238805770874, 'reward': 0.35500000715255736, 'reward_std': 0.09192387759685516, 'frac_reward_zero_std': 0.4, 'entropy': 2.4752604007720946, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.19}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.8400000000000003e-05, 'num_tokens': 9980.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.0, 'reward': 0.34000000953674314, 'reward_std': 0.0, 'frac_reward_zero_std': 1.0, 'entropy': 2.714322900772095, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.24}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.8066666666666668e-05, 'num_tokens': 11976.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.42000000476837157, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.42000000476837157, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.558593773841858, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.29}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.7733333333333335e-05, 'num_tokens': 13978.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.3932291507720946, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.33}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.7400000000000003e-05, 'num_tokens': 15960.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.3800000071525574, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.4, 'entropy': 2.312760424613953, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.38}\n{'loss': 0.0, 'grad_norm': 0.5101456642150879, 'learning_rate': 1.706666666666667e-05, 'num_tokens': 17970.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.4200000107288361, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.4200000107288361, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.2, 'entropy': 2.8614583015441895, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.43}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.6733333333333335e-05, 'num_tokens': 19980.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.32000001072883605, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.32000001072883605, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.526822900772095, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.48}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.64e-05, 'num_tokens': 21960.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.4000000059604645, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.4000000059604645, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.7783854484558104, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.52}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.606666666666667e-05, 'num_tokens': 23956.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.34000000953674314, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.7033854484558106, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.57}\n{'loss': 0.0, 'grad_norm': 0.5273916721343994, 'learning_rate': 1.5733333333333334e-05, 'num_tokens': 25954.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.3800000071525574, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.4, 'entropy': 2.7705729484558104, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.62}\n{'loss': 0.0, 'grad_norm': 0.6088199615478516, 'learning_rate': 1.54e-05, 'num_tokens': 27962.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.5481770992279054, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.67}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.5066666666666668e-05, 'num_tokens': 29990.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.6627604484558107, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.71}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.4733333333333335e-05, 'num_tokens': 31966.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.3600000083446503, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.5354166984558106, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.76}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.4400000000000001e-05, 'num_tokens': 33958.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.34000000953674314, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.759635400772095, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.81}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.4066666666666669e-05, 'num_tokens': 35946.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.3600000083446503, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.9869791507720946, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.86}\n{'loss': 0.0, 'grad_norm': 0.5336744785308838, 'learning_rate': 1.3733333333333335e-05, 'num_tokens': 37962.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.4000000059604645, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.4000000059604645, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.4075520992279054, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.9}\n{'loss': 0.0, 'grad_norm': 0.6080942749977112, 'learning_rate': 1.3400000000000002e-05, 'num_tokens': 39970.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.523958349227905, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 0.95}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.3066666666666668e-05, 'num_tokens': 41968.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.4351562738418577, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.0}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.2733333333333336e-05, 'num_tokens': 43970.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.30000001192092896, 'rewards/reward_funcs/std': 0.0, 'reward': 0.30000001192092896, 'reward_std': 0.0, 'frac_reward_zero_std': 1.0, 'entropy': 2.62109375, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.05}\n{'loss': 0.0, 'grad_norm': 0.5846278071403503, 'learning_rate': 1.2400000000000002e-05, 'num_tokens': 45988.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.453906297683716, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.1}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.206666666666667e-05, 'num_tokens': 47998.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.32000001072883605, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.32000001072883605, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.6614583015441893, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.14}\n{'loss': 0.0, 'grad_norm': 0.6321809887886047, 'learning_rate': 1.1733333333333335e-05, 'num_tokens': 50002.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.3800000071525574, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.2, 'entropy': 2.651822900772095, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.19}\n{'loss': 0.0, 'grad_norm': 0.659778356552124, 'learning_rate': 1.14e-05, 'num_tokens': 52002.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.34000000953674314, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.8, 'entropy': 2.359895849227905, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.24}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.1066666666666669e-05, 'num_tokens': 54006.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.32000001072883605, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.32000001072883605, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.2322916746139527, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.29}\n{'loss': 0.0, 'grad_norm': 0.6547545194625854, 'learning_rate': 1.0733333333333333e-05, 'num_tokens': 56004.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.3800000071525574, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.2, 'entropy': 3.0687500476837157, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.33}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.04e-05, 'num_tokens': 57980.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.5916666269302366, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.38}\n{'loss': 0.0, 'grad_norm': 0.48406097292900085, 'learning_rate': 1.0066666666666666e-05, 'num_tokens': 59980.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.3800000071525574, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.2, 'entropy': 2.38046875, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.43}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 9.733333333333334e-06, 'num_tokens': 61970.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.34000000953674314, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.5591145753860474, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.48}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 9.4e-06, 'num_tokens': 63950.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.42000000476837157, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.42000000476837157, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.7098958015441896, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.52}\n{'loss': 0.0, 'grad_norm': 0.5738649368286133, 'learning_rate': 9.066666666666667e-06, 'num_tokens': 65946.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.6, 'entropy': 2.57421875, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.57}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 8.733333333333333e-06, 'num_tokens': 67930.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.33500000834465027, 'rewards/reward_funcs/std': 0.12020815014839173, 'reward': 0.33500000834465027, 'reward_std': 0.12020814716815949, 'frac_reward_zero_std': 0.2, 'entropy': 2.5583333492279055, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.62}\n{'loss': 0.0, 'grad_norm': 0.5746757984161377, 'learning_rate': 8.400000000000001e-06, 'num_tokens': 69958.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.4000000059604645, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.4000000059604645, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.535677099227905, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.67}\n{'loss': 0.0, 'grad_norm': 0.5328442454338074, 'learning_rate': 8.066666666666667e-06, 'num_tokens': 71954.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.6125, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.71}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 7.733333333333334e-06, 'num_tokens': 73946.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.34000000953674314, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.5192708015441894, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.76}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 7.4e-06, 'num_tokens': 75956.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.64921875, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.81}\n{'loss': 0.0, 'grad_norm': 0.5616747140884399, 'learning_rate': 7.066666666666667e-06, 'num_tokens': 77932.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.3197916746139526, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.86}\n{'loss': 0.0, 'grad_norm': 0.6317049860954285, 'learning_rate': 6.733333333333334e-06, 'num_tokens': 79918.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.3489583492279054, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.9}\n{'loss': 0.0, 'grad_norm': 0.5577821731567383, 'learning_rate': 6.4000000000000006e-06, 'num_tokens': 81936.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.42000000476837157, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.42000000476837157, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.4322916507720946, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 1.95}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 6.066666666666667e-06, 'num_tokens': 83936.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.0, 'reward': 0.34000000953674314, 'reward_std': 0.0, 'frac_reward_zero_std': 1.0, 'entropy': 2.3255208015441893, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.0}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 5.733333333333334e-06, 'num_tokens': 85944.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.3600000083446503, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.6328124523162844, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.05}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 5.400000000000001e-06, 'num_tokens': 87964.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.34000000953674314, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.4989583492279053, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.1}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 5.0666666666666676e-06, 'num_tokens': 89972.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.6677083492279055, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.14}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 4.7333333333333335e-06, 'num_tokens': 91984.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.34000000953674314, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.34000000953674314, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.469531226158142, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.19}\n{'loss': 0.0, 'grad_norm': 0.5214574933052063, 'learning_rate': 4.4e-06, 'num_tokens': 93982.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.9989583015441896, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.24}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 4.066666666666667e-06, 'num_tokens': 95968.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.38000001311302184, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.38000001311302184, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.6018229007720945, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.29}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 3.7333333333333337e-06, 'num_tokens': 97950.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.32000001072883605, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.32000001072883605, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.8973958492279053, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.33}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 3.4000000000000005e-06, 'num_tokens': 99962.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.1619791984558105, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.38}\n{'loss': 0.0, 'grad_norm': 0.6244723200798035, 'learning_rate': 3.066666666666667e-06, 'num_tokens': 101960.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.38000001311302184, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.38000001311302184, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.2153645277023317, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.43}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 2.7333333333333336e-06, 'num_tokens': 103954.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3600000083446503, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.3600000083446503, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.5609375, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.48}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 2.4000000000000003e-06, 'num_tokens': 105928.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.4000000059604645, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.4000000059604645, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.45859375, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.52}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 2.0666666666666666e-06, 'num_tokens': 107920.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.88359375, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.57}\n{'loss': 0.0, 'grad_norm': 0.5468803644180298, 'learning_rate': 1.7333333333333336e-06, 'num_tokens': 109908.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.4000000059604645, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.4000000059604645, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.5015625, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.62}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 1.4000000000000001e-06, 'num_tokens': 111904.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.11313707828521728, 'reward': 0.3800000071525574, 'reward_std': 0.11313707828521728, 'frac_reward_zero_std': 0.2, 'entropy': 2.7330729484558107, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.67}\n{'loss': 0.0, 'grad_norm': 0.5374178886413574, 'learning_rate': 1.066666666666667e-06, 'num_tokens': 113900.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.521874952316284, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.71}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 7.333333333333334e-07, 'num_tokens': 115902.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.4000000059604645, 'rewards/reward_funcs/std': 0.02828426957130432, 'reward': 0.4000000059604645, 'reward_std': 0.02828426957130432, 'frac_reward_zero_std': 0.8, 'entropy': 2.27109375, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.76}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 4.0000000000000003e-07, 'num_tokens': 117890.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.44000000357627866, 'rewards/reward_funcs/std': 0.08485280871391296, 'reward': 0.44000000357627866, 'reward_std': 0.08485280871391296, 'frac_reward_zero_std': 0.4, 'entropy': 2.2208333253860473, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.81}\n{'loss': 0.0, 'grad_norm': 0.0, 'learning_rate': 6.666666666666668e-08, 'num_tokens': 119914.0, 'completions/mean_length': 96.0, 'completions/min_length': 96.0, 'completions/max_length': 96.0, 'completions/clipped_ratio': 1.0, 'completions/mean_terminated_length': 0.0, 'completions/min_terminated_length': 0.0, 'completions/max_terminated_length': 0.0, 'rewards/reward_funcs/mean': 0.3800000071525574, 'rewards/reward_funcs/std': 0.05656853914260864, 'reward': 0.3800000071525574, 'reward_std': 0.05656853914260864, 'frac_reward_zero_std': 0.6, 'entropy': 2.6872395515441894, 'clip_ratio/low_mean': 0.0, 'clip_ratio/low_min': 0.0, 'clip_ratio/high_mean': 0.0, 'clip_ratio/high_max': 0.0, 'clip_ratio/region_mean': 0.0, 'epoch': 2.86}\n{'train_runtime': 3432.3418, 'train_samples_per_second': 0.175, 'train_steps_per_second': 0.087, 'train_loss': 2.483526865641276e-09, 'epoch': 2.86}\n100%|█████████████████████████████████████████| 300/300 [57:10<00:00, 11.43s/it]\nSaved metrics → evaluation/results/training_metrics.json\nReal training complete → checkpoints/final\n\u001b[1;34mwandb\u001b[0m: \n\u001b[1;34mwandb\u001b[0m: 🚀 View run \u001b[33mmeta-kaggle-final\u001b[0m at: \u001b[34mhttps://wandb.ai/yusufindian09-aaa/meta_hackathon/runs/f44p90zl\u001b[0m\n\u001b[1;34mwandb\u001b[0m: Find logs at: \u001b[1;35mwandb/run-20260426_034610-f44p90zl/logs\u001b[0m\n","output_type":"stream"}],"execution_count":12},{"id":"26ce2e8a","cell_type":"code","source":"import json\nfrom pathlib import Path\n\nmetrics_path = Path('evaluation/results/training_metrics.json')\nmetrics = json.loads(metrics_path.read_text())\nprint('Metrics file:', metrics_path)\nprint('Mode:', metrics.get('mode'))\nprint('Model:', metrics.get('model_id'))\nprint('Train runtime (s):', metrics.get('train_runtime_seconds'))\nprint('WandB URL:', metrics.get('wandb_run_url'))\nprint('Loss points:', len(metrics.get('loss_curve', [])))\nprint('Reward points:', len(metrics.get('reward_curve', [])))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T04:43:28.131418Z","iopub.execute_input":"2026-04-26T04:43:28.131680Z","iopub.status.idle":"2026-04-26T04:43:28.139350Z","shell.execute_reply.started":"2026-04-26T04:43:28.131652Z","shell.execute_reply":"2026-04-26T04:43:28.138565Z"}},"outputs":[{"name":"stdout","text":"Metrics file: evaluation/results/training_metrics.json\nMode: real\nModel: Qwen/Qwen2.5-1.5B-Instruct\nTrain runtime (s): 3432.3418\nWandB URL: https://wandb.ai/yusufindian09-aaa/meta_hackathon/runs/f44p90zl\nLoss points: 60\nReward points: 60\n","output_type":"stream"}],"execution_count":13},{"id":"4a68ba06","cell_type":"code","source":"#!python evaluation/evaluate.py --episodes 50\n!python evaluation/plot_results.py\n!ls -lah evaluation/results","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T04:43:28.140227Z","iopub.execute_input":"2026-04-26T04:43:28.140514Z","iopub.status.idle":"2026-04-26T04:43:30.052692Z","shell.execute_reply.started":"2026-04-26T04:43:28.140484Z","shell.execute_reply":"2026-04-26T04:43:30.051673Z"}},"outputs":[{"name":"stdout","text":"Saved PNG plots in evaluation/results/\ntotal 288K\ndrwxr-xr-x 2 root root 4.0K Apr 26 03:45 .\ndrwxr-xr-x 4 root root 4.0K Apr 26 02:22 ..\n-rw-r--r-- 1 root root 34K Apr 26 02:22 component_breakdown_20260426.png\n-rw-r--r-- 1 root root 34K Apr 26 04:43 component_breakdown.png\n-rw-r--r-- 1 root root 51K Apr 26 04:43 decision_heatmap.png\n-rw-r--r-- 1 root root 22K Apr 26 04:43 energy_trajectory.png\n-rw-r--r-- 1 root root 6.9K Apr 26 02:22 evaluation_summary.json\n-rw-r--r-- 1 root root 14K Apr 26 04:43 loss_curve.png\n-rw-r--r-- 1 root root 46K Apr 26 04:43 reward_curve.png\n-rw-r--r-- 1 root root 57K Apr 26 04:43 training_metrics.json\n","output_type":"stream"}],"execution_count":14},{"id":"1e7a91a2","cell_type":"code","source":"# Package submission artifacts for download\n!zip -r submission_artifacts.zip README.md openenv.yaml environment training evaluation/results -x \"*/__pycache__/*\" \"*.pyc\" \".ipynb_checkpoints/*\"\n!ls -lah submission_artifacts.zip","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T04:43:30.054187Z","iopub.execute_input":"2026-04-26T04:43:30.054552Z","iopub.status.idle":"2026-04-26T04:43:30.295785Z","shell.execute_reply.started":"2026-04-26T04:43:30.054523Z","shell.execute_reply":"2026-04-26T04:43:30.295144Z"}},"outputs":[{"name":"stdout","text":"updating: README.md (deflated 60%)\nupdating: openenv.yaml (deflated 29%)\nupdating: environment/ (stored 0%)\nupdating: environment/events.py (deflated 74%)\nupdating: environment/state.py (deflated 66%)\nupdating: environment/env.py (deflated 64%)\nupdating: environment/judge.py (deflated 55%)\nupdating: environment/consequences.py (deflated 65%)\nupdating: environment/reward.py (deflated 67%)\nupdating: environment/__init__.py (deflated 23%)\nupdating: training/ (stored 0%)\nupdating: training/push_to_hf.py (deflated 50%)\nupdating: training/__init__.py (stored 0%)\nupdating: training/rollout.py (deflated 50%)\nupdating: training/train.py (deflated 70%)\nupdating: training/train.ipynb (deflated 70%)\nupdating: training/grpo_config.py (deflated 43%)\nupdating: evaluation/results/ (stored 0%)\nupdating: evaluation/results/training_metrics.json (deflated 94%)\nupdating: evaluation/results/energy_trajectory.png (deflated 12%)\nupdating: evaluation/results/decision_heatmap.png (deflated 12%)\nupdating: evaluation/results/evaluation_summary.json (deflated 81%)\nupdating: evaluation/results/reward_curve.png (deflated 5%)\nupdating: evaluation/results/component_breakdown.png (deflated 15%)\nupdating: evaluation/results/component_breakdown_20260426.png (deflated 15%)\nupdating: evaluation/results/loss_curve.png (deflated 21%)\n-rw-r--r-- 1 root root 311K Apr 26 04:43 submission_artifacts.zip\n","output_type":"stream"}],"execution_count":15},{"id":"4b1a3df0","cell_type":"code","source":"# Optional: upload model to HF Model Hub\n# Uncomment and edit repo id when needed\n!python training/push_to_hf.py --repo-id YUS200619/meta_hackathon-qwen-model --folder checkpoints/final","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-26T04:43:30.296945Z","iopub.execute_input":"2026-04-26T04:43:30.297348Z","iopub.status.idle":"2026-04-26T04:43:43.694220Z","shell.execute_reply.started":"2026-04-26T04:43:30.297320Z","shell.execute_reply":"2026-04-26T04:43:43.693507Z"}},"outputs":[{"name":"stdout","text":"Processing Files (0 / 0) : | | 0.00B / 0.00B \nNew Data Upload : | | 0.00B / 0.00B \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\nProcessing Files (1 / 1) : 13%|█▊ | 11.4MB / 85.3MB, 11.4MB/s \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 0%| | 45.7kB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 13%|█▉ | 11.5MB / 85.3MB, 9.55MB/s \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 0%| | 45.7kB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 0%| | 45.7kB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 0%| | 45.7kB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 14%|█▉ | 12.0MB / 85.3MB, 6.01MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 1%| | 551kB / 73.9MB, 275kB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 1%| | 596kB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 1%| | 596kB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 1%| | 596kB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 15%|██ | 12.6MB / 85.3MB, 4.49MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 1%|▏ | 1.10MB / 73.9MB, 393kB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 17%|██▎ | 14.2MB / 85.3MB, 4.74MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 4%|▌ | 2.75MB / 73.9MB, 917kB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 19%|██▋ | 16.4MB / 85.3MB, 5.13MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 7%|▉ | 4.96MB / 73.9MB, 1.55MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 21%|██▉ | 18.1MB / 85.3MB, 5.32MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 9%|█▎ | 6.61MB / 73.9MB, 1.94MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 23%|███▏ | 19.7MB / 85.3MB, 5.48MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 11%|█▌ | 8.26MB / 73.9MB, 2.29MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 28%|███▉ | 24.1MB / 85.3MB, 6.35MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 17%|██▍ | 12.7MB / 73.9MB, 3.33MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 33%|████▋ | 28.5MB / 85.3MB, 7.13MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 23%|███▏ | 17.1MB / 73.9MB, 4.27MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 38%|█████▎ | 32.4MB / 85.3MB, 7.71MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 28%|███▉ | 20.9MB / 73.9MB, 4.98MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 43%|██████ | 36.8MB / 85.3MB, 8.36MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 34%|████▊ | 25.3MB / 73.9MB, 5.76MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 48%|██████▊ | 41.2MB / 85.3MB, 8.96MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 40%|█████▋ | 29.7MB / 73.9MB, 6.46MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 53%|███████▍ | 45.6MB / 85.3MB, 9.50MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 46%|██████▍ | 34.1MB / 73.9MB, 7.11MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 59%|████████▎ | 50.6MB / 85.3MB, 10.1MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 53%|███████▍ | 39.1MB / 73.9MB, 7.82MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 68%|█████████▍ | 57.7MB / 85.3MB, 11.1MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 63%|████████▊ | 46.3MB / 73.9MB, 8.89MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 72%|██████████ | 61.6MB / 85.3MB, 11.4MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 68%|█████████▍ | 50.1MB / 73.9MB, 9.28MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 77%|██████████▊ | 66.0MB / 85.3MB, 11.8MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 74%|██████████▎ | 54.5MB / 73.9MB, 9.73MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 82%|███████████▌ | 70.4MB / 85.3MB, 12.1MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 80%|███████████▏ | 58.9MB / 73.9MB, 10.2MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 87%|████████████▏ | 74.2MB / 85.3MB, 12.4MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 85%|███████████▉ | 62.8MB / 73.9MB, 10.5MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 92%|████████████▊ | 78.1MB / 85.3MB, 12.6MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 90%|████████████▋ | 66.6MB / 73.9MB, 10.7MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 90%|████████████▋ | 66.7MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 90%|████████████▋ | 66.7MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 90%|████████████▋ | 66.7MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 90%|████████████▋ | 66.7MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 97%|█████████████▌| 82.4MB / 85.3MB, 11.5MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 96%|█████████████▍| 71.0MB / 73.9MB, 9.86MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 99%|█████████████▉| 84.7MB / 85.3MB, 11.4MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 99%|█████████████▉| 73.2MB / 73.9MB, 9.89MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (1 / 2) : 100%|█████████████▉| 85.2MB / 85.3MB, 11.2MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 100%|█████████████▉| 73.8MB / 73.9MB, 9.71MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 100%|█████████████▉| 73.8MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (2 / 2) : 100%|██████████████| 85.3MB / 85.3MB, 10.7MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 100%|██████████████| 73.9MB / 73.9MB, 9.23MB/s \u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 100%|██████████████| 73.9MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 100%|██████████████| 73.9MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 100%|██████████████| 73.9MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 100%|██████████████| 73.9MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 100%|██████████████| 73.9MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\n ...adapter_model.safetensors: 100%|██████████████| 73.9MB / 73.9MB \u001b[A\u001b[A\u001b[A\n\n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \u001b[A\u001b[A\n\n\nProcessing Files (2 / 2) : 100%|██████████████| 85.3MB / 85.3MB, 9.27MB/s \u001b[A\u001b[A\u001b[A\nNew Data Upload : 100%|██████████████| 73.9MB / 73.9MB, 8.03MB/s \n ...ints/final/tokenizer.json: 100%|██████████████| 11.4MB / 11.4MB \n ...adapter_model.safetensors: 100%|██████████████| 73.9MB / 73.9MB \nUploaded model folder to https://huggingface.co/YUS200619/meta_hackathon-qwen-model\n","output_type":"stream"}],"execution_count":16}]}