diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3e87f255b8dd6eb1bdd74fb628b6c9b357e1e5b2 --- /dev/null +++ b/.DS_Store @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbeeb76fdd091af74b22bc95dbda1f6ef8555509dad1f5a0bd2ea7edc4368d46 +size 10244 diff --git a/.gitattributes b/.gitattributes index f0286abb17cb35216af217455cfeb0f29021bcd3..ff5948f13dbd5bf5f6d84b101358ed098f0feadb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,59 +1,10 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text +# Auto detect text files and perform LF normalization +* text=auto +*.jpg filter=lfs diff=lfs merge=lfs -text +*.jpeg filter=lfs diff=lfs merge=lfs -text +*.webm filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text -app/ui/frontend/dist/blackhole.webm filter=lfs diff=lfs merge=lfs -text -app/ui/frontend/public/blackhole.webm filter=lfs diff=lfs merge=lfs -text -docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -docs/results/qwen_completed_runs/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -submission_bundle/qwen_completed_runs/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -docs/UI[[:space:]]Images/1.jpeg filter=lfs diff=lfs merge=lfs -text -docs/UI[[:space:]]Images/2.jpeg filter=lfs diff=lfs merge=lfs -text -docs/UI[[:space:]]Images/3.jpeg filter=lfs diff=lfs merge=lfs -text -docs/UI[[:space:]]Images/4.jpeg filter=lfs diff=lfs merge=lfs -text -docs/UI[[:space:]]Images/5.jpeg filter=lfs diff=lfs merge=lfs -text -docs/assets/diagrams/data_training_pipeline.png filter=lfs diff=lfs merge=lfs -text -docs/assets/diagrams/deployment_topology.png filter=lfs diff=lfs merge=lfs -text -docs/assets/diagrams/frontend_runtime_surface.png filter=lfs diff=lfs merge=lfs -text -docs/assets/diagrams/multi_agent_orchestration.png filter=lfs diff=lfs merge=lfs -text -docs/assets/diagrams/reward_decomposition.png filter=lfs diff=lfs merge=lfs -text -docs/assets/diagrams/system_architecture.png filter=lfs diff=lfs merge=lfs -text -docs/results/final_submission_evidence/charts/curated/reward_and_safety/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text -docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_reward_curve.png filter=lfs diff=lfs merge=lfs -text -docs/results/final_submission_evidence/charts/frontpage/04_reward_components.png filter=lfs diff=lfs merge=lfs -text -docs/results/final_submission_evidence/charts/frontpage/09_qwen_3b_grpo_reward_curve.png filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +.DS_Store filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile index 98d1116f0afd6bb4c961509d865c140dcae6e78d..894604c7e2b6ffca82326ad8f69a37cd992b2542 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,31 +1,27 @@ -# Hugging Face Space: single-port edge (nginx) + OpenEnv (8100) + API (8200) + static UI. -# Build from repository root: docker build -f Dockerfile.space -t polyguard-space . -# Cheap tier: use Space "CPU basic"; first boot downloads ~1.1GB model bundle. +# Hugging Face Space wrapper for the GitHub repository root. +# The repository keeps the runnable app under polyguard-rl/, while Spaces expect +# Dockerfile at the Space root. FROM node:20-bookworm-slim AS frontend WORKDIR /build -COPY app/ui/frontend/package.json app/ui/frontend/package-lock.json ./ +COPY polyguard-rl/app/ui/frontend/package.json polyguard-rl/app/ui/frontend/package-lock.json ./ RUN npm ci -COPY app/ui/frontend/ ./ +COPY polyguard-rl/app/ui/frontend/ ./ ENV VITE_API_BASE=/api RUN npm run build FROM python:3.11-slim-bookworm WORKDIR /app ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends nginx \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends nginx && rm -rf /var/lib/apt/lists/* -COPY requirements-space.txt /app/requirements-space.txt -RUN pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu \ - && pip install --no-cache-dir -r /app/requirements-space.txt +COPY polyguard-rl/requirements-space.txt /app/requirements-space.txt +RUN pip install --no-cache-dir --upgrade pip && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && pip install --no-cache-dir -r /app/requirements-space.txt -COPY . /app +COPY polyguard-rl/ /app/ COPY --from=frontend /build/dist /app/static -RUN chmod +x /app/docker/space/entrypoint.sh \ - && mkdir -p /app/data /app/checkpoints/active +RUN chmod +x /app/docker/space/entrypoint.sh && mkdir -p /app/data /app/checkpoints/active ENV PORT=7860 ENV POLYGUARD_ALLOW_HF_SPACE_CORS=true diff --git a/README.md b/README.md index 204f8e9b9857fe98848a0b53f96d14b45cd3c96d..fd1ffd33751108edd3772b6b1b86deffae9faecc 100644 --- a/README.md +++ b/README.md @@ -5,64 +5,1244 @@ colorTo: green sdk: docker app_port: 7860 pinned: false +license: mit --- -# PolyGuard (OpenEnv implementation package) +# POLYGUARD-OPENENV -Run all CLI commands from this directory (`cd polyguard-rl`). The repository root [`README.md`](../README.md) carries the same submission narrative with paths adjusted for viewers landing on the GitHub repo home page. +Someone does not experience an unsafe medication regimen as "polypharmacy." +They experience it as dizziness after a new sleep medication, bleeding after a +painkiller is added to a blood thinner, confusion from a sedative-opioid +combination, or a preventable emergency visit because five prescribers each saw +one slice of the medication list. The dangerous part is often not a single +drug. It is the combination: the wrong pair, the wrong dose in the wrong organ +function context, the missing lab, the duplicated class, the abrupt stop that +should have been a taper, or the model that confidently says "looks fine" +because it was never forced to act inside a safety-checked environment. -## Submission Links +That is the problem PolyGuard was built for. The +[CDC](https://www.cdc.gov/medication-safety/data-research/facts-stats/index.html) +reports that adverse drug events send more than 1.5 million people to US +emergency departments every year, with almost 500,000 hospitalizations; adults +65 and older account for more than 600,000 of those emergency visits. A +CDC-authored [JAMA surveillance study](https://jamanetwork.com/journals/jama/fullarticle/2585977) +found that older adults made up 34.5 percent of ED visits for outpatient adverse +drug events and had the highest hospitalization rate, 43.6 percent; among older +adults, anticoagulants, diabetes agents, and opioid analgesics were implicated +in about 59.9 percent of ADE ED visits. Globally, the +[WHO](https://www.who.int/initiatives/medication-without-harm) estimates +medication errors cost USD 42 billion annually. And AHRQ's deprescribing safety +review summarizes estimates that +[45 percent of older adults are exposed to polypharmacy and 58 percent to +potentially inappropriate medications](https://www.ncbi.nlm.nih.gov/books/NBK600387/). -- GitHub Repo URL: [https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK](https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK) -- HF Space URL: [https://huggingface.co/spaces/TheJackBright/polyguard-openenv-workbench](https://huggingface.co/spaces/TheJackBright/polyguard-openenv-workbench) -- Colab Notebook URL: [https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb](https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb) (see also `notebooks/09_training_loop.ipynb` for a modular training walkthrough) -- YouTube Video URL: not used for this submission; the repository root README is the story artifact. -- Story artifact: the repository root [`README.md`](../README.md) is the final blog-style narrative and evidence map. +Not every adverse drug event is caused by an incorrect drug combination, but +these numbers describe the harm surface this project targets: medication +decisions where combination risk, monitoring gaps, frailty, organ function, +uncertainty, and action sequencing all matter at once. -## Shared Environment, Logs, And Scripts +PolyGuard turns that problem into an OpenEnv-compatible reinforcement-learning +environment for polypharmacy safety, medication optimization, deprescribing, +safe substitution, missing-evidence recovery, and precision dosing. An LLM +policy observes a constrained patient/regimen state, chooses a legal candidate +action, receives verifier-backed reward, and improves through SFT plus +GRPO-style post-training. -The required environment files, training logs, and training scripts are shared -in the repo and indexed in [Submission Artifact Index](docs/submission_artifacts.md). +This repository is both a research artifact and a product prototype. It contains +the OpenEnv server, a multi-agent policy stack, synthetic and structured +medication datasets, TRL training scripts, verifier-backed reward functions, +agentic evaluation, curated result charts, final artifacts, and a React +operator workbench. -- Environment/runtime: `openenv.yaml`, `pyproject.toml`, `uv.lock`, `requirements*.txt`, `Dockerfile*`, `app/env/`, `server/app.py`, and `app/hf_space/Dockerfile`. -- Training scripts/notebooks: `PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`, `notebooks/09_training_loop.ipynb`, `scripts/train_sft_trl.py`, `scripts/train_grpo_trl.py`, `scripts/deploy_training_space.py`, `app/hf_space/training_runner.py`, and `app/training/`. -- Training logs/results: `docs/results/final_submission_evidence/reports/`, `docs/results/sweeps/`, `docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/`, and `docs/results/qwen_completed_runs/reports/`. -- Final downloadable artifact Space: [https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts](https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts). +PolyGuard is not medical software and is not clinical advice. It is a controlled +research environment for studying how language-model policies can be trained and +audited on safety-critical medication action selection. -## Problem Statement +## Safety Contract -Polypharmacy decisions are long-horizon, partially observable, and safety-critical. PolyGuard is a research environment where an LLM agent selects constrained clinical actions, receives verifier-backed reward, and improves via SFT + GRPO—not generic open-ended chat fine-tuning. +PolyGuard does not let a model directly mutate a medication list from free text. +Every decision is candidate-based, verifier-checked, reward-decomposed, and +traced. Illegal actions can be scored, penalized, and logged, but they do not +change patient state. The system is designed for research on safety-critical +action selection, not for clinical ordering or patient-specific treatment +advice. -## Environment +## Try, Read, And Review -`PolyGuardEnv` exposes OpenEnv-style HTTP/WebSocket endpoints (`/reset`, `/step`, `/state`, `/metadata`, `/schema`, `/mcp`, `/health`, `/ws`). Sub-environments include DDI, bandit mining, regimen risk, precision dosing, longitudinal deprescribing, web-search missing data, alternative suggestion, and new-drug decomposition. See `openenv.yaml`, `app/env/env_core.py`, `app/env/fastapi_app.py`, and `docs/environment_design.md`. +- GitHub repository: + [Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK](https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK) +- Product Hugging Face Space: + [TheJackBright/polyguard-openenv-workbench](https://huggingface.co/spaces/TheJackBright/polyguard-openenv-workbench) +- One-run Colab/HF notebook: + [PolyGuard_SFT_GRPO_One_Run_Runner.ipynb](https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb) +- Final evidence index: + [polyguard-rl/docs/results/final_submission_evidence/README.md](polyguard-rl/docs/results/final_submission_evidence/README.md) +- Shared environment, logs, scripts, and notebooks: + [polyguard-rl/docs/submission_artifacts.md](polyguard-rl/docs/submission_artifacts.md) +- Final artifact/evidence Space: + [adithya9903/polyguard-openenv-final-artifacts](https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts) -## Agent Capabilities + Note: this Space hosts the Qwen 3B artifact bundle. The Qwen 0.5B and 1.5B + runs were trained using a second Hugging Face account, so their model + artifacts could not be hosted in the same final Space. Their report mirrors + are checked into this repo: + [0.5B reports](polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct) + and + [1.5B reports](polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct). -Medication reconciliation, evidence retrieval, graph safety, dosing guardrails, candidate generation, supervisor routing, planner/critic stack, explanations, and contextual bandit ranking for ablations (`app/agents/`, `docs/agents.md`). +## Why This Problem Matters -## Tasks +Medication safety is a combinatorial, partially observable, and high-stakes +decision problem. A useful policy has to do more than generate a plausible +sentence. It has to notice drug-drug interaction risk, reason about +comorbidities and organ function, respect taper and monitoring requirements, +choose safe substitutions, abstain or ask for review when uncertainty is high, +and expose why it acted. The AGS Beers Criteria and STOPP/START criteria exist +because many unsafe medication choices are systematic, recognizable, and +evaluable, but still hard to operationalize across fragmented medication lists +and incomplete context. -DDI risk reduction, safe adds/substitutions, regimen optimization, taper/deprescribing sequences, precision dosing, missing-data recovery, and new-drug decomposition (`data/scenarios/`, `app/env/catalog.py`). +The machine-learning pressure is equally real. If a medication vocabulary has +500 drugs, the number of possible five-drug combinations is: -## Reward Model / Evaluation Logic +```text +C(500, 5) = 255,244,687,600 +``` -Thirteen verifier-backed reward components roll up into four primary channels (`safety_legality`, `clinical_improvement`, `dosing_quality`, `process_integrity`), clamped to `[0.001, 0.999]`, with anti-cheat and timeout logic (`app/env/reward_router.py`, `app/env/anti_cheat.py`, `docs/reward_design.md`). +Exhaustively evaluating every combination is impossible in realistic data +settings. The paper that inspired this project, [Neural Bandits for Data Mining: +Searching for Dangerous Polypharmacy](https://arxiv.org/abs/2212.05190), frames +dangerous polypharmacy discovery as a bandit search problem over a massive +combination space. It benchmarks neural bandit search over simulated +polypharmacy datasets with 500 drugs and 100,000 distinct combinations, and +reports detection of up to 72 percent of potentially inappropriate +polypharmacies with 99 percent average precision after 30,000 time steps. -## Training And Post-Training Strategy +PolyGuard takes inspiration from that search framing, but moves the problem from +offline combination mining into an agentic environment: the policy sees a +patient state, chooses among legal clinical action candidates, and is judged by +a deterministic verifier and reward router rather than by free-form text +preference alone. -Build corpora (`scripts/bootstrap_data.py`, `scripts/build_training_corpus.py`), SFT with TRL (`scripts/train_sft_trl.py`), GRPO with environment reward (`scripts/train_grpo_trl.py`), merge adapters (`scripts/merge_adapters_safe.py`), validate inference (`scripts/test_inference_postsave.py`), evaluate and plot (`scripts/evaluate_*.py`, `docs/results/`). Optional HF GPU training uses `scripts/deploy_training_space.py`; public review should start with the repository root [`README.md`](../README.md), then `docs/training.md` for implementation notes. +## A Concrete Failure Trace -## Documentation index +In the final matched-seed traces, the failure mode is not abstract. On seeds +`8000` and `8004`, the basic prompt-style proxy repeatedly chose `cand_01`, +the first legal candidate, which meant `KEEP_REGIMEN` while a hidden +`warfarin_like` + `nsaid_like` interaction remained unresolved. The verifier +recorded `holdout_ddi_not_addressed`. The full PolyGuard pipeline selected +`cand_03`, a safer intervention candidate, and avoided those failure reasons. -- [Architecture](docs/architecture.md) -- [Environment](docs/environment_design.md) -- [Rewards](docs/reward_design.md) -- [Training](docs/training.md) -- [Evaluation](docs/evaluation.md) -- [Deployment](docs/deployment.md) -- [Datasets](docs/datasets.md) -- [Participant guide traceability](docs/participant_guide_traceability.md) -- [Idea doc vs implementation](docs/idea_document_traceability.md) -- [Submission artifact index](docs/submission_artifacts.md) -- [**Space UI demo script**](docs/DEMO_RECORDING_SCRIPT.md) +That is the core research bet of this repo: medication AI should be judged +inside a stateful safety environment, not only by whether its answer sounds +clinically plausible. + +Internal evidence: +[basic_llm_vs_polyguard_report.json](polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json) +and +[action_traces.jsonl](polyguard-rl/docs/results/final_submission_evidence/reports/action_traces.jsonl). + +## Core Idea + +PolyGuard asks a narrow but important research question: + +Can environment-backed feedback make a small open model better at safe +medication action selection than prompt-only, first-legal, rule-only, or +single-agent baselines? + +The project answers that question with an inspectable stack: + +1. A finite-horizon OpenEnv simulation for medication decisions. +2. A constrained action space, so the model chooses candidate actions instead + of inventing arbitrary clinical instructions. +3. A legality verifier that prevents unsafe state mutation. +4. Thirteen reward components rolled into four primary reward channels. +5. A multi-agent policy stack with supervisor routing, contextual bandit + reranking, planner selection, critic veto, and explanation logging. +6. SFT for format and clinical-prior warm start. +7. GRPO with environment-backed reward, not an opaque LLM judge. +8. Agentic evaluation with baseline comparison, policy ablations, post-save + inference, robustness checks, action traces, and failure mining. + +![PolyGuard system architecture](polyguard-rl/docs/assets/diagrams/system_architecture.png) + +## Internal Evidence At A Glance + +| Claim | Repo evidence | +| --- | --- | +| Hard contraindication examples are represented | [app/knowledge/ddi_knowledge.py](polyguard-rl/app/knowledge/ddi_knowledge.py) | +| Safer alternatives are explicit | [app/knowledge/substitution_rules.py](polyguard-rl/app/knowledge/substitution_rules.py) | +| Unsafe substitutions and dose escalations are blocked before state mutation | [app/env/verifier.py](polyguard-rl/app/env/verifier.py) | +| Reward hacking and loop-like behavior are surfaced | [app/env/anti_cheat.py](polyguard-rl/app/env/anti_cheat.py), [docs/reward_design.md](polyguard-rl/docs/reward_design.md) | +| Baseline failure is traceable by seed and candidate | [docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json](polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json), [docs/results/final_submission_evidence/reports/action_traces.jsonl](polyguard-rl/docs/results/final_submission_evidence/reports/action_traces.jsonl) | +| Final evidence is curated separately from older smoke artifacts | [docs/results/final_submission_evidence/README.md](polyguard-rl/docs/results/final_submission_evidence/README.md) | + +## Project Map + +The implementation lives under [polyguard-rl/](polyguard-rl/). + +| Area | Key paths | +| --- | --- | +| OpenEnv runtime | [openenv.yaml](polyguard-rl/openenv.yaml), [app/env/env_core.py](polyguard-rl/app/env/env_core.py), [app/env/fastapi_app.py](polyguard-rl/app/env/fastapi_app.py), [server/app.py](polyguard-rl/server/app.py) | +| Action/state contracts | [app/common/types.py](polyguard-rl/app/common/types.py), [app/common/enums.py](polyguard-rl/app/common/enums.py) | +| Candidate generation and verifier | [app/models/policy/candidate_builder.py](polyguard-rl/app/models/policy/candidate_builder.py), [app/env/verifier.py](polyguard-rl/app/env/verifier.py) | +| Reward and anti-cheat | [app/env/reward_router.py](polyguard-rl/app/env/reward_router.py), [app/env/reward_scaling.py](polyguard-rl/app/env/reward_scaling.py), [app/env/anti_cheat.py](polyguard-rl/app/env/anti_cheat.py), [configs/rewards.yaml](polyguard-rl/configs/rewards.yaml) | +| Multi-agent policy | [app/agents/](polyguard-rl/app/agents/), [docs/agents.md](polyguard-rl/docs/agents.md) | +| Bandits and baselines | [app/models/baselines/contextual_bandit.py](polyguard-rl/app/models/baselines/contextual_bandit.py), [app/models/baselines/contextual_bandit_policy.py](polyguard-rl/app/models/baselines/contextual_bandit_policy.py), [app/models/baselines/](polyguard-rl/app/models/baselines/) | +| Training | [app/training/](polyguard-rl/app/training/), [scripts/train_sft_trl.py](polyguard-rl/scripts/train_sft_trl.py), [scripts/train_grpo_trl.py](polyguard-rl/scripts/train_grpo_trl.py), [docs/training.md](polyguard-rl/docs/training.md) | +| Data | [data/raw/knowledge/drug_knowledge.json](polyguard-rl/data/raw/knowledge/drug_knowledge.json), [data/processed/](polyguard-rl/data/processed/), [data/scenarios/](polyguard-rl/data/scenarios/), [docs/datasets.md](polyguard-rl/docs/datasets.md) | +| Evaluation | [app/evaluation/](polyguard-rl/app/evaluation/), [scripts/evaluate_all.py](polyguard-rl/scripts/evaluate_all.py), [docs/evaluation.md](polyguard-rl/docs/evaluation.md) | +| Product API/UI | [app/api/](polyguard-rl/app/api/), [app/ui/frontend/](polyguard-rl/app/ui/frontend/), [docs/ui.md](polyguard-rl/docs/ui.md) | +| Math | [docs/math.md](polyguard-rl/docs/math.md), [docs/mathematics.md](polyguard-rl/docs/mathematics.md) | +| Results | [docs/results/final_submission_evidence/](polyguard-rl/docs/results/final_submission_evidence/) | + +This README is the canonical narrative and evidence map. The docs under +[polyguard-rl/docs/](polyguard-rl/docs/) are supporting references: +[architecture.md](polyguard-rl/docs/architecture.md) for system design, +[environment_design.md](polyguard-rl/docs/environment_design.md) for +state/action mechanics, [reward_design.md](polyguard-rl/docs/reward_design.md) +for reward channels, [safety.md](polyguard-rl/docs/safety.md) for guardrails, +[precision_dosing.md](polyguard-rl/docs/precision_dosing.md) for dosing details, +[graph_models.md](polyguard-rl/docs/graph_models.md) for graph/risk modeling, +[ablations.md](polyguard-rl/docs/ablations.md) for policy-slice analysis, +[api.md](polyguard-rl/docs/api.md) for service routes, +[deployment.md](polyguard-rl/docs/deployment.md) for deployment surfaces, +[ui.md](polyguard-rl/docs/ui.md) and +[DEMO_RECORDING_SCRIPT.md](polyguard-rl/docs/DEMO_RECORDING_SCRIPT.md) for the +operator demo, and [submission_artifacts.md](polyguard-rl/docs/submission_artifacts.md) +for artifact traceability. + +Older smoke-run mirrors are retained for auditability. Final claims in this +README use the curated evidence bundle under +[docs/results/final_submission_evidence/](polyguard-rl/docs/results/final_submission_evidence/). + +## Environment Design + +At the center is `PolyGuardEnv`, implemented in +[app/env/env_core.py](polyguard-rl/app/env/env_core.py). It follows the familiar +OpenEnv/Gym shape: + +```text +reset(seed, difficulty, sub_environment, scenario_id, patient_id) + -> PolyGuardObservation + +step(PolyGuardAction) + -> (PolyGuardObservation, reward, done, info) +``` + +At reset, the environment loads or generates a patient scenario, selects a +difficulty and sub-environment, computes a risk summary, builds candidate +actions, estimates uncertainty, and emits a strict observation. At step time, +the environment parses the action, checks legality, evaluates anti-cheat rules, +mutates state only if the action is safe, computes decomposed reward, appends a +trace, and returns detailed `info` fields such as failure reasons, transition +delta, primary reward channels, invalid-action count, and timeout checks. + +![Runtime step flow](polyguard-rl/docs/assets/diagrams/runtime_step_flow.png) + +### Sub-Environments + +PolyGuard is not a single task. It cycles through specialized sub-environments: + +| Sub-environment | What it stresses | +| --- | --- | +| `DDI` | High-risk drug-drug interaction recognition and resolution | +| `BANDIT_MINING` | Candidate exploration and shortlist/ranking behavior inspired by bandit search | +| `REGIMEN_RISK` | General medication burden and regimen optimization | +| `PRECISION_DOSING` | Dose-hold, dose reduction, renal/hepatic guardrails, monitoring decisions | +| `LONGITUDINAL_DEPRESCRIBING` | Multi-step taper/deprescribing behavior over a longer horizon | +| `WEB_SEARCH_MISSING_DATA` | Evidence fetch or review when critical data is missing | +| `ALTERNATIVE_SUGGESTION` | Safe alternatives and within-class substitution | +| `NEW_DRUG_DECOMPOSITION` | First-pass reasoning over an unknown or combination medication | + +The curriculum in [app/env/curriculum.py](polyguard-rl/app/env/curriculum.py) +starts with short easy DDI/regimen-risk episodes, then adds bandit and +alternative-selection tasks, and finally hard cases with precision dosing, +longitudinal deprescribing, missing data, and new-drug decomposition. + +### State And Observation + +The latent state is represented by `PolyGuardState` and includes: + +- Patient demographics and identifiers. +- Active decision mode. +- Step count and max step budget. +- Medications, dose buckets, comorbidities, labs, vitals, frailty, adherence, + monitoring gaps, and prior adverse event history. +- Burden score, severe-pair count, precision dosing flags, unresolved conflicts, + action history, cumulative reward, and done state. + +The agent does not get all simulator internals. It receives a controlled +`PolyGuardObservation`: + +- Patient summary. +- Medication table. +- Comorbidity summary. +- Organ function and labs/vitals. +- Graph safety summary. +- Burden score summary. +- Precision dosing flags. +- Unresolved conflicts. +- Candidate action set. +- Step budget remaining. +- Action history. +- Warning summary. +- Abstention indicators. +- Deterministic contract with seed, scenario, difficulty, and sub-environment. + +This split matters: PolyGuard is a partially observable environment. Missing +labs and unresolved conflicts are visible as uncertainty signals, not as hidden +reward traps. + +## Action Space And Safety Constraints + +PolyGuard deliberately avoids unconstrained text actions. The policy chooses a +strict `PolyGuardAction` with fields such as: + +- `mode`: `REGIMEN_OPT`, `DOSE_OPT`, `REVIEW`, or `ABSTAIN_REVIEW`. +- `action_type`: one of the constrained clinical action types. +- `target_drug`, `replacement_drug`, `dose_bucket`, `taper_days`, + `monitoring_plan`, `evidence_query`, `new_drug_name`, and + `candidate_components`. +- `candidate_id`, `confidence`, and `rationale_brief`. + +The action types are intentionally compact: + +| Family | Action types | +| --- | --- | +| Regimen | `KEEP_REGIMEN`, `STOP_DRUG`, `SUBSTITUTE_WITHIN_CLASS`, `RECOMMEND_ALTERNATIVE` | +| Dosing | `REDUCE_DOSE_BUCKET`, `INCREASE_DOSE_BUCKET`, `DOSE_HOLD`, `ORDER_MONITORING_AND_WAIT` | +| Deprescribing | `TAPER_INITIATE`, `TAPER_CONTINUE` | +| Evidence and uncertainty | `FETCH_EXTERNAL_EVIDENCE`, `DECOMPOSE_NEW_DRUG`, `REQUEST_SPECIALIST_REVIEW`, `REQUEST_PHARMACIST_REVIEW` | + +The candidate builder in +[app/models/policy/candidate_builder.py](polyguard-rl/app/models/policy/candidate_builder.py) +generates a bounded candidate set: + +```text +3 <= |C_t| <= 10 +``` + +Each candidate carries estimated safety delta, burden delta, disease stability, +uncertainty score, rationale tags, required monitoring, and a legality precheck. +Policy selection is candidate selection: + +```text +a_t = to_action(c_t), where c_t is in C_t +``` + +The verifier in [app/env/verifier.py](polyguard-rl/app/env/verifier.py) enforces +hard safety constraints before state mutation. It checks, among other things: + +- The target drug exists in the regimen when required. +- Substitutions and alternatives are drawn from allowed substitution rules. +- External evidence domains are allowlisted. +- New-drug decomposition includes a new drug and components. +- Abrupt stopping is blocked when tapering is required. +- Renal/hepatic unsafe dose escalation is blocked. +- Duplicate therapy and contraindicated replacement pairs are blocked. +- Monitoring and hold actions include a monitoring plan. +- Destabilizing deprescribing patterns are blocked. + +Illegal actions can receive reward penalties and become visible in traces, but +they do not mutate patient state. + +## Multi-Agent Policy Stack + +The "agents" in PolyGuard are an auditable policy factorization rather than +independent chatbots. A step flows through: + +```text +MedRec -> Evidence -> GraphSafety -> Dosing -> Candidate + -> Supervisor -> Planner -> Critic -> Env -> Explainer +``` + +![Multi-agent orchestration](polyguard-rl/docs/assets/diagrams/multi_agent_orchestration.png) + +| Agent/module | Role | +| --- | --- | +| `MedRecAgent` | Summarizes current regimen and medication burden | +| `EvidenceAgent` | Retrieves local or fallback evidence when missing data is present | +| `GraphSafetyAgent` | Scores risky pairs, side-effect load, duplicate therapy, and graph safety patterns | +| `DosingAgent` | Detects dose-sensitive cases and dose-hold opportunities | +| `CandidateAgent` | Exposes legal candidate actions from the environment candidate builder | +| `SupervisorAgent` | Routes to regimen optimization, dose optimization, or review mode | +| `PlannerAgent` | Selects an action from candidates through the policy provider | +| `CriticAgent` | Vetoes illegal or unsafe proposed actions and can force review fallback | +| `ExplainerAgent` | Records grounded rationale for demo, replay, and audit | + +The orchestration modes are: + +- `sequential_pipeline` +- `supervisor_routed` +- `replan_on_veto` +- `lightweight_debate` + +Policy-stack ablations compare: + +- `bandit-only` +- `llm-only` +- `llm+bandit` + +## Contextual Bandits + +PolyGuard uses contextual bandits as an inspectable candidate-reranking layer. +This is where the project most directly echoes the arXiv bandit inspiration: +unsafe polypharmacy search is combinatorial, so the system should learn which +regions of the candidate/action space are worth exploring rather than enumerate +everything. + +Each candidate becomes an 8-dimensional feature vector: + +```text +x(c) = [ + 1, + I[legality_precheck], + estimated_safety_delta, + burden_delta, + disease_stability_estimate, + 1 - uncertainty_score, + I[mode = DOSE_OPT], + I[mode = REVIEW] +] +``` + +An arm is keyed by macro mode and action type: + +```text +arm(c) = mode(c) || ":" || action_type(c) +``` + +The LinUCB variant maintains, for each arm `a`: + +```text +A_a = I + sum x x^T +b_a = sum r x +theta_a = A_a^{-1} b_a + +score_a(x) = theta_a^T x + alpha * sqrt(x^T A_a^{-1} x) +``` + +There is also a Thompson-style variant: + +```text +score_a(x) = theta_a^T x + Normal(0, alpha) +``` + +This layer can shortlist candidates before the planner emits the final action. +It is deliberately kept inside the candidate space: the bandit can improve +ordering and exploration, but it cannot invent an unsafe action outside the +environment contract. + +## Reward Model + +The reward model is decomposed on purpose. A single scalar reward is needed for +RL, but safety-critical RL needs more than one opaque number. PolyGuard logs 13 +component columns and four primary channels on every step. + +![Reward decomposition](polyguard-rl/docs/assets/diagrams/reward_decomposition.png) + +All reward values are clamped and quantized: + +```text +q(x) = round(clip(x, 0.001, 0.999), 3) +``` + +The 13 reward components are: + +| Component | Weight | Meaning | +| --- | ---: | --- | +| `format_compliance_score` | 0.08 | Action payload conforms to the schema | +| `candidate_alignment_score` | 0.08 | The model selected a valid candidate-style id | +| `legality_score` | 0.12 | The verifier accepted the action | +| `safety_delta_score` | 0.15 | Severe-pair and burden risk decreased | +| `burden_improvement_score` | 0.08 | Dose-weighted medication burden improved | +| `disease_stability_score` | 0.10 | The action did not destabilize underlying disease management | +| `dosing_quality_score` | 0.08 | Dose-sensitive routing/action quality | +| `abstention_quality_score` | 0.06 | Review/abstention is appropriate under uncertainty | +| `efficiency_score` | 0.06 | The action uses the finite step budget well | +| `process_fidelity_score` | 0.06 | The action follows task-specific process expectations | +| `explanation_grounding_score` | 0.03 | The rationale is present and grounded | +| `anti_cheat_score` | 0.06 | Reward-hacking checks did not fire | +| `uncertainty_calibration_score` | 0.04 | Confidence matches observable uncertainty | + +The scalar reward is a weighted average: + +```text +R_env(s_t, a_t, s_{t+1}) = q( sum_i w_i c_i / sum_i w_i ) +``` + +Safety-heavy terms dominate the total weight: + +```text +legality + safety_delta + burden + disease_stability + anti_cheat += 0.12 + 0.15 + 0.08 + 0.10 + 0.06 += 0.51 +``` + +The four primary reward channels are: + +| Channel | Component family | +| --- | --- | +| `safety_legality` | legality, candidate alignment, anti-cheat, uncertainty calibration | +| `clinical_improvement` | safety delta, burden improvement, disease stability | +| `dosing_quality` | dosing quality and abstention quality | +| `process_integrity` | format compliance, efficiency, process fidelity, explanation grounding | + +These channels are emitted in `info.primary_reward_channels`, GRPO reward logs, +reports, plots, and ablation summaries. + +## Anti-Cheat And Failure Visibility + +RL policies exploit reward functions. PolyGuard makes common shortcut failures +explicit: + +- Repeated action loops. +- Excessive keep-regimen behavior. +- Excessive review/abstention behavior. +- Candidate ID mismatch. +- Candidate outside the legal set. +- Hidden high-risk DDI no-op behavior. +- Parser exploit patterns in rationales. +- Retrying a failed no-op action. + +If an exploit is detected: + +```text +anti_cheat_score = 0.001 +done = true +termination_reason = "exploit_detection" +``` + +Episodes can also terminate on step budget exhaustion, repeated invalid actions, +safety-veto threshold, patient destabilization, safe resolution, wall-clock +timeout, or per-step timeout. + +![Episode state machine](polyguard-rl/docs/assets/diagrams/episode_state_machine.png) + +## Mathematics + +PolyGuard can be read as a finite-horizon constrained partially observable +Markov decision process: + +```text +M = (S, A, O, T, R, H, C) +``` + +where: + +- `S` is latent patient/regimen state. +- `A` is the constrained medication action set. +- `O` is the controlled observation. +- `T(s' | s, a)` is the transition function. +- `R(s, a, s')` is verifier-backed reward. +- `H` is the episode horizon. +- `C(s, a)` is the hard safety/legality constraint predicate. + +The objective is: + +```text +maximize_pi E_pi [ sum_{t=0}^{H-1} R(s_t, a_t, s_{t+1}) ] +subject to C(s_t, a_t) = 1 whenever possible +``` + +There is no explicit discount factor in the runtime. Time preference enters +through finite horizons and the efficiency reward: + +```text +efficiency_t = q(1 - step_count_t / (max_steps + 1)) +``` + +State transition is two-gated: + +```text +if verifier(s_t, a_t).legal and not anti_cheat(s_t, a_t): + s_{t+1} = T(s_t, a_t) +else: + s_{t+1} = rollback_state_with_failed_action_record(s_t, a_t) +``` + +Risk-like deltas become reward through: + +```text +delta_reward(pre, post) = q(0.5 + 0.6 * (pre - post)) +``` + +For burden and contraindicated-pair improvement: + +```text +burden_reward = delta_reward(pre_burden, post_burden) +pair_reward = delta_reward(pre_pairs, post_pairs) + +safety_delta_score = + q(0.65 * pair_reward + 0.35 * burden_reward) if legal + 0.001 otherwise +``` + +GRPO uses environment execution as the reward function. For each prompt, the +model emits candidate completions; PolyGuard parses the candidate id, resets a +deterministic environment using the recorded seed and scenario fields, executes +one step, and returns reward. The training reward combines environment reward +with a legality bonus: + +```text +legal_bonus = 0.95 if action is legal else 0.05 + +R_GRPO = q(0.80 * R_env + 0.20 * legal_bonus) +``` + +Conceptually, GRPO forms a within-prompt advantage: + +```text +A_i = (R_i - mean_j R_j) / (std_j R_j + epsilon) +``` + +and optimizes a clipped policy-ratio objective with KL regularization. The +optimizer mechanics are TRL's; PolyGuard's contribution is the verifier-backed +reward function and the controlled action/state environment. + +The expanded derivation is in +[polyguard-rl/docs/mathematics.md](polyguard-rl/docs/mathematics.md). + +## Data And Dataset Pipeline + +The data pipeline builds a compact medication-safety substrate from local drug +knowledge, synthetic patients, scenario files, retrieval text, and optional +external augmentation. + +![Data and training pipeline](polyguard-rl/docs/assets/diagrams/data_training_pipeline.png) + +Tracked local processed data currently includes: + +| Artifact | Count | Path | +| --- | ---: | --- | +| Normalized drug rows | 10 | [data/processed/normalized_drugs.parquet](polyguard-rl/data/processed/normalized_drugs.parquet) | +| Drug class rows | 10 | [data/processed/drug_classes.parquet](polyguard-rl/data/processed/drug_classes.parquet) | +| Interaction rows | 2 | [data/processed/interactions.parquet](polyguard-rl/data/processed/interactions.parquet) | +| Graph edges | 18 | [data/processed/graph_edges.parquet](polyguard-rl/data/processed/graph_edges.parquet) | +| Synthetic patients | 20 | [data/processed/patients_synthetic.parquet](polyguard-rl/data/processed/patients_synthetic.parquet) | +| Retrieval documents | 8 | [data/processed/retrieval_corpus.jsonl](polyguard-rl/data/processed/retrieval_corpus.jsonl) | +| Easy scenarios | 100 | [data/scenarios/scenarios_easy.jsonl](polyguard-rl/data/scenarios/scenarios_easy.jsonl) | +| Medium scenarios | 200 | [data/scenarios/scenarios_medium.jsonl](polyguard-rl/data/scenarios/scenarios_medium.jsonl) | +| Hard scenarios | 200 | [data/scenarios/scenarios_hard.jsonl](polyguard-rl/data/scenarios/scenarios_hard.jsonl) | +| Local small SFT rows | 80 | [data/processed/training_corpus_sft.jsonl](polyguard-rl/data/processed/training_corpus_sft.jsonl) | +| Local small GRPO prompts | 80 | [data/processed/training_corpus_grpo_prompts.jsonl](polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl) | + +The provenance manifest records the source policy and counts: +[data/processed/provenance_manifest.json](polyguard-rl/data/processed/provenance_manifest.json). + +Additional data-governance and rule artifacts are intentionally checked in: + +| Artifact | Why it matters | +| --- | --- | +| [data/processed/ingested_sources.json](polyguard-rl/data/processed/ingested_sources.json) | Source ingestion ledger used by the local build | +| [data/processed/feature_dictionary.json](polyguard-rl/data/processed/feature_dictionary.json) | Names and meanings of structured model features | +| [data/processed/burden_rules.yaml](polyguard-rl/data/processed/burden_rules.yaml) | Medication-burden and duplicate-therapy rules | +| [data/processed/substitution_rules.yaml](polyguard-rl/data/processed/substitution_rules.yaml) | Data-level safer-substitution rules | +| [data/processed/taper_rules.yaml](polyguard-rl/data/processed/taper_rules.yaml) | Deprescribing and taper requirements | +| [data/retrieval_index/index.json](polyguard-rl/data/retrieval_index/index.json) | Retrieval index over local evidence chunks | + +The local knowledge seed is +[data/raw/knowledge/drug_knowledge.json](polyguard-rl/data/raw/knowledge/drug_knowledge.json). +It contains drug classes, example high-risk pairs, renal and hepatic flags, +side-effect tags, substitution rules, and taper requirements. The processed +tables then feed graph modeling, candidate generation, environment scenarios, +retrieval, SFT rows, and GRPO prompts. + +The full training/evidence runs used 2,000 examples per Qwen model, recorded in +the final reports under +[docs/results/final_submission_evidence/reports/](polyguard-rl/docs/results/final_submission_evidence/reports/). + +## Models Inside The Environment + +PolyGuard combines learned and rule-backed components: + +- Graph safety model: + [app/models/graph/](polyguard-rl/app/models/graph/) produces regimen + embeddings, pairwise DDI severity, severe-alert probability, and side-effect + tag probabilities. Fallback graph features include drug identity hashes, + class counts, side-effect load, medication count, contraindicated-pair count, + and class flags. +- Tabular risk model: + [app/models/tabular/](polyguard-rl/app/models/tabular/) supports calibrated + patient/regimen risk heads and evaluation. +- Dosing model: + [app/models/dosing/](polyguard-rl/app/models/dosing/) models dose-sensitive + states with target attainment, toxicity, underdose risk, organ stress, + interaction load, and monitoring need. +- Retrieval: + [app/models/retrieval/](polyguard-rl/app/models/retrieval/) and + [app/knowledge/](polyguard-rl/app/knowledge/) provide local evidence chunks, + drug rules, renal/hepatic guardrails, duplicate therapy rules, substitution + rules, taper rules, burden scoring, and side-effect ontology. +- Active model runtime: + [app/models/policy/active_model.py](polyguard-rl/app/models/policy/active_model.py) + discovers activated artifacts from `checkpoints/active/active_model_manifest.json`. + The provider load order prefers a GRPO adapter, then merged model, then SFT + adapter. +- Provider runtime: + [app/models/policy/provider_runtime.py](polyguard-rl/app/models/policy/provider_runtime.py) + is Transformers-first, with optional Ollama when enabled. If model loading is + unavailable, the runtime falls back to deterministic safety ranking. + +Tracked support-model reports show that the environment is not only an LLM +wrapper: + +| Component | Report | Current tracked result | +| --- | --- | --- | +| Graph model | [docs/results/graph_train.json](polyguard-rl/docs/results/graph_train.json) | `status: trained`, `num_samples: 180`, artifact path `outputs/models/graph_model.pkl` | +| Tabular risk model | [docs/results/risk_train.json](polyguard-rl/docs/results/risk_train.json) | `status: trained`, `dataset_size: 180`, `train_mae: 0.0033`, artifact path `outputs/models/tabular_risk.pkl` | +| Dose surrogate model | [docs/results/dose_train.json](polyguard-rl/docs/results/dose_train.json) | `status: trained`, `dataset_size: 120`, `train_mae: 0.0025`, artifact path `outputs/models/dose_model.pkl` | + +The hard-coded contraindicated seed pairs in +[app/knowledge/ddi_knowledge.py](polyguard-rl/app/knowledge/ddi_knowledge.py) +include `warfarin_like` + `nsaid_like` and `benzodiazepine_like` + +`opioid_like`. Substitution rules in +[app/knowledge/substitution_rules.py](polyguard-rl/app/knowledge/substitution_rules.py) +include safer alternatives such as `nsaid_like -> acetaminophen_like`, +`nsaid_like -> topical_nsaid_like`, `benzodiazepine_like -> +non_benzo_sleep_support`, and `opioid_like -> non_opioid_analgesic`. + +### Precision Dosing Details + +Precision dosing uses sensitive classes such as anticoagulants, sedatives, and +glucose-lowering drugs. The dosing agent and surrogate model are implemented in +[app/agents/dosing_agent.py](polyguard-rl/app/agents/dosing_agent.py) and +[app/models/dosing/](polyguard-rl/app/models/dosing/). + +The surrogate PK/PD transition in +[app/models/dosing/surrogate_pkpd.py](polyguard-rl/app/models/dosing/surrogate_pkpd.py) +uses effect, toxicity, underdose, organ stress, and interaction load: + +```text +effective_delta = dose_delta * (1 - min(0.6, organ_factor * 0.4)) + +effect' = + clip(effect + 0.28 * effective_delta - 0.05 * interaction_factor, 0, 1) + +toxicity_gain = + max(0, dose_delta) * (0.35 + 0.25 * organ_factor + 0.20 * interaction_factor) + +toxicity' = + clip(0.85 * toxicity + toxicity_gain, 0, 1) + +underdose' = + clip(1 - effect' + 0.15 * max(0, -dose_delta), 0, 1) +``` + +The higher-level dosing metrics use target attainment, toxicity avoidance, +underdose risk, and monitoring need: + +```text +target_attainment = 1 - abs(effect_level - 0.62) +toxicity_proxy = toxicity_level + 0.20 * organ_stress + 0.12 * interaction_load +measurement_need = max(toxicity_proxy, underdose_proxy) +``` + +## Training And Post-Training + +The training stack is deliberately staged: + +1. Build structured data, scenarios, retrieval records, SFT examples, and GRPO + prompts. +2. Run SFT with TRL to teach the model the candidate-id format and obvious + clinical priors. +3. Run GRPO with environment-backed reward, where sampled candidate completions + are executed in PolyGuardEnv and scored by the verifier/reward router. +4. Track sampled generations, reward components, primary reward channels, + legality, anti-cheat events, and training curves. +5. Run policy-stack ablations and baseline comparisons. +6. Merge or export adapters safely. +7. Validate post-save inference from the saved artifact, not from an in-memory + training object. +8. Generate reports, charts, action traces, and final artifact manifests. + +The relevant training source files are: + +- [scripts/train_sft_trl.py](polyguard-rl/scripts/train_sft_trl.py) +- [scripts/train_grpo_trl.py](polyguard-rl/scripts/train_grpo_trl.py) +- [app/training/sft_trl.py](polyguard-rl/app/training/sft_trl.py) +- [app/training/grpo_trl.py](polyguard-rl/app/training/grpo_trl.py) +- [app/training/reward_functions.py](polyguard-rl/app/training/reward_functions.py) +- [app/training/openenv_wrapper.py](polyguard-rl/app/training/openenv_wrapper.py) +- [app/hf_space/training_runner.py](polyguard-rl/app/hf_space/training_runner.py) + +The one-run notebook is +[polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb](polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb). +It is the accessible Colab/HF workflow for building data, running checks, +launching training, pulling reports, generating charts, validating inference, +activating a model, deploying the product Space, and running acceptance checks. + +The modular notebook series is: + +- [01_data_building.ipynb](polyguard-rl/notebooks/01_data_building.ipynb) +- [02_knowledge_graph.ipynb](polyguard-rl/notebooks/02_knowledge_graph.ipynb) +- [03_risk_models.ipynb](polyguard-rl/notebooks/03_risk_models.ipynb) +- [04_environment_validation.ipynb](polyguard-rl/notebooks/04_environment_validation.ipynb) +- [05_sft_debug.ipynb](polyguard-rl/notebooks/05_sft_debug.ipynb) +- [06_grpo_debug.ipynb](polyguard-rl/notebooks/06_grpo_debug.ipynb) +- [07_policy_analysis.ipynb](polyguard-rl/notebooks/07_policy_analysis.ipynb) +- [08_dosing_analysis.ipynb](polyguard-rl/notebooks/08_dosing_analysis.ipynb) +- [09_training_loop.ipynb](polyguard-rl/notebooks/09_training_loop.ipynb) + +For exact local and remote execution details, use +[docs/training.md](polyguard-rl/docs/training.md) and +[docs/submission_artifacts.md](polyguard-rl/docs/submission_artifacts.md). +Those docs contain operational notes; this README keeps the blog story focused +on architecture, data, evaluation, and evidence. + +## Training Curves And Model Results + +The final curated evidence lives in +[polyguard-rl/docs/results/final_submission_evidence/](polyguard-rl/docs/results/final_submission_evidence/). +It replaces earlier smoke-run charts and older 0.5B/1.5B-only views. + +### SFT Loss Across Qwen Runs + +![SFT loss curves across Qwen runs](polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/sft_loss_curves_all_models.png) + +The SFT curves, post-save valid rates, and token-accuracy histories together +show that the models learned the candidate-id output contract rather than only +producing unconstrained prose. The visible curves drop from roughly `3.0-3.6` +initial loss to low final loss across all three Qwen sizes. + +![Qwen 3B SFT training loss](polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_sft_training_loss.png) + +The tracked per-model summaries are: + +| Run | Model | Epochs | Final step | Runtime | Key SFT metrics | +| --- | --- | ---: | ---: | ---: | --- | +| `qwen-qwen2-5-0-5b-instruct` | `Qwen/Qwen2.5-0.5B-Instruct` | 2 | 2,000 | `234.6302s` | loss `3.0856 -> 0.0626`, best `0.0057`, train loss `0.1923`, token accuracy `0.9717`, valid rate `1.0`, avg env reward `0.726`, latency `1.839s` | +| `qwen-qwen2-5-1-5b-instruct` | `Qwen/Qwen2.5-1.5B-Instruct` | 2 | 4,000 | `483.7085s` | loss `2.9686 -> 0.0681`, best `0.0009`, train loss `0.1152`, token accuracy `0.9726`, valid rate `1.0`, avg env reward `0.726`, latency `2.158s` | +| `qwen-qwen2-5-3b-instruct` | `Qwen/Qwen2.5-3B-Instruct` | 2 | 2,000 | `715.2908s` | loss `3.5687 -> 0.054`, best `0.0022`, train loss `0.1569`, token accuracy `0.9750`, SFT avg env reward `0.781`, SFT latency `2.863s` | + +Each SFT run used `2,000` examples. The 0.5B and 3B runs recorded `2,001` +history rows including the final trainer summary; the 1.5B run recorded `4,001` +history rows because its batch configuration produced `4,000` final steps. + +### GRPO Reward Curve + +![Qwen 3B GRPO reward curve](polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_reward_curve.png) + +![Qwen 3B GRPO training loss](polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_loss_curve.png) + +The complete GRPO evidence is available for Qwen 3B: + +- Backend: `trl_transformers` +- Model: `Qwen/Qwen2.5-3B-Instruct` +- Records: `2000` +- Epochs: `1.0` +- Final step: `2000` +- Runtime: `6873.9375s` (`1.91h`) +- Reward samples: `4000` +- GRPO average reward: `0.767` +- GRPO reward history: min `0.376`, max `0.880`, last `0.812`, average `0.76685` +- GRPO train loss: `0.000002665` +- Post-save GRPO valid rate: `1.0` +- Post-save GRPO average environment reward: `0.726` +- Post-save GRPO average latency: `3.681s` +- Artifact path recorded in the report: `checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter` + +The source reports are: + +- [reports/grpo_trl_run.json](polyguard-rl/docs/results/final_submission_evidence/reports/grpo_trl_run.json) +- [reports/postsave_inference_grpo.json](polyguard-rl/docs/results/final_submission_evidence/reports/postsave_inference_grpo.json) +- [reports/submission_summary.json](polyguard-rl/docs/results/final_submission_evidence/reports/submission_summary.json) + +### SFT vs GRPO By Model + +![SFT vs GRPO verifier reward by model](polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_vs_grpo_reward_by_model.png) + +This chart is intentionally transparent about artifact availability. Qwen 0.5B +and 1.5B have SFT reports/histories and post-save SFT evidence in the repo, but +their adapter directories were not present in the local/final artifact mirrors +at packaging time. Qwen 3B has the complete SFT plus GRPO artifact set. + +The packaged manifest records Qwen 3B as complete with `125` checkpoint files +(`433,208,536` bytes), `11` SFT adapter files (`30,655,905` bytes), `11` GRPO +adapter files (`30,656,841` bytes), and `9` report files (`5,930,214` bytes). +Qwen 0.5B and 1.5B are retained as report/post-save evidence only. + +The manifest records this explicitly: +[docs/results/final_submission_evidence/manifest.json](polyguard-rl/docs/results/final_submission_evidence/manifest.json). + +### Product Pipeline vs Basic LLM Proxy + +![Basic LLM vs full PolyGuard pipeline](polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png) + +Matched-seed evaluation compares a basic LLM-style first-legal proxy, an +SFT-style safety ranker, and the full PolyGuard orchestrated pipeline. The same +PolyGuard verifier/reward system judges all three. + +| Policy | Episodes | Avg reward | Legality rate | Failure/exploit rate | Candidate diversity | +| --- | ---: | ---: | ---: | ---: | ---: | +| Basic LLM proxy | 8 | `0.762` | `1.0` | `0.25` | 1 | +| SFT policy proxy | 8 | `0.818` | `1.0` | `0.0` | 2 | +| Full PolyGuard pipeline | 8 | `0.805` | `1.0` | `0.0` | 2 | + +The full pipeline improves average verifier reward over the basic LLM proxy by +`+0.043` while reducing visible failure/exploit rate from `0.25` to `0.0`. + +![Reward delta by matched seed](polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/reward_delta_by_seed.png) + +Two matched seeds expose the core failure mode: the basic policy repeatedly +kept a regimen despite the hidden `warfarin_like` + `nsaid_like` DDI holdout, +triggering `holdout_ddi_not_addressed`. The full pipeline selected safer dose +or hold candidates and avoided those failure reasons. + +Source: +[reports/basic_llm_vs_polyguard_report.json](polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json). + +### Reward Components And Channels + +![Reward component bars](polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/reward_component_bars.png) + +![Primary reward channel bars](polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/primary_reward_channel_bars.png) + +The reward charts are as important as the scalar reward curve. They show whether +the model is improving by becoming safer and more process-faithful or merely +exploiting one easy component. The reports log the full 13-component reward +vector and the four primary channels for GRPO and evaluation runs. + +For Qwen 3B GRPO, the tracked average primary channels are: + +| Channel | Average | +| --- | ---: | +| `safety_legality` | `0.816` | +| `clinical_improvement` | `0.609` | +| `dosing_quality` | `0.543` | +| `process_integrity` | `0.875` | + +### Post-Save Inference + +![Inference validity and reward](polyguard-rl/docs/results/final_submission_evidence/charts/curated/inference/inference_validity_reward.png) + +Post-save inference is a separate check from training. The exported/activated +artifact is loaded and asked to choose candidate ids on held prompt samples. The +Qwen 3B GRPO adapter path produced: + +- `model_source: adapter` +- `samples: 5` +- `valid_rate: 1.0` +- `avg_env_reward: 0.726` +- `avg_latency_seconds: 3.681` + +This is why the README treats post-training as more than a training log: the +saved artifact must still produce parseable candidate ids and executable +environment actions. + +One caveat matters: `valid_rate: 1.0` means the output was parseable and +executable as a candidate selection. In the five-sample Qwen 3B post-save GRPO +report, four valid samples still terminated with `exploit_detection`. That is +retained as safety evidence, because PolyGuard's job is to expose suspicious or +loop-like behavior instead of hiding it behind a clean parse metric. + +## Agentic Evaluation + +Evaluation is not just one benchmark number. The evaluation stack under +[app/evaluation/](polyguard-rl/app/evaluation/) includes: + +- Offline policy evaluation. +- Safety evaluation. +- Dosing evaluation. +- Robustness under missing labs, noisy dose info, conflicting medications, + alias noise, hidden duplicate therapy, wrong candidate ids, stale evidence, + and delayed adverse event manifestation. +- Calibration and abstention evaluation. +- Process fidelity and invalid-action tracking. +- Subgroup summaries for renal compromise, hepatic compromise, and frailty. +- Explainability grounding. +- Baseline comparison. +- Policy ablations. +- Failure mining and action traces. + +The tracked benchmark report records: + +| Metric family | Result | +| --- | --- | +| Offline avg reward | `0.772833` | +| Offline legal rate | `1.0` | +| Severe violation rate | `0.0` | +| Illegal step rate | `0.0` | +| Dosing target attainment | `0.75` | +| Dosing toxicity avoidance | `1.0` | +| Missing-labs safety rate | `0.666667` | +| Noisy-dose, conflicting-meds, alias-noise, hidden-duplicate, wrong-candidate-id, stale-evidence, delayed-ADE safety/resilience | `1.0` | +| Calibration ECE proxy | `0.08625` | +| Process fidelity | `0.92` | +| Explainability grounding | `0.8` | + +Source: +[docs/results/benchmark_report.json](polyguard-rl/docs/results/benchmark_report.json). + +The improvement gate compares baseline and candidate reports: + +| Gate dimension | Delta | +| --- | ---: | +| Average reward | `+0.025833` | +| Legality rate | `0.0` non-regression | +| Success rate | `0.0` non-regression | +| Process fidelity | `+0.92` | +| Timeout rate | `0.0` non-regression | +| Failure visibility | `0.0` non-regression | + +Source: +[docs/results/improvement_report.json](polyguard-rl/docs/results/improvement_report.json). + +### Policy Ablation Results + +| Stack | Avg reward | Legality | Visible failure rate | Exploit detections | Interpretation | +| --- | ---: | ---: | ---: | ---: | --- | +| `bandit_only` | `0.779625` | `1.0` | `0.0625` | 2 | Strong deterministic shortlist behavior with low failure visibility | +| `llm_only` | `0.772391` | `1.0` | `0.3043` | 7 | Legal, but more loop-like failure behavior | +| `llm+bandit` | `0.764739` | `1.0` | `0.3043` | 7 | Current combined stack needs tighter exploration/control in these ablation settings | + +![Policy ablation reward](polyguard-rl/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_reward.png) + +The point of these ablations is not to claim every combined policy is always +better. The point is that PolyGuard can localize behavior: legality remains +high, while failure mining shows whether a stack is looping, over-reviewing, +or selecting non-improving candidates. + +Source: +[reports/policy_ablation_report.json](polyguard-rl/docs/results/final_submission_evidence/reports/policy_ablation_report.json). + +## OpenEnv And Product Surfaces + +The OpenEnv package is compact: + +```yaml +spec_version: 1 +name: polyguard-openenv +runtime: fastapi +app: app.env.fastapi_app:app +port: 8100 +``` + +The OpenEnv runtime exposes: + +- `POST /reset` +- `POST /step` +- `GET /state` +- `GET /metadata` +- `GET /schema` +- `POST /mcp` +- `GET /health` +- `GET /ws` +- Backward-compatible `/env/*` routes + +The product API in [app/api/routes.py](polyguard-rl/app/api/routes.py) wraps the +environment, orchestrator, policy runtime, evaluation, evidence search, cases, +metrics, and medication-alternative tooling. Useful product-facing endpoints +include `/env/reset`, `/env/step_candidate`, `/agents/orchestrate`, +`/policy/infer`, `/policy/model_status`, `/eval/run_policy`, +`/metrics/training`, `/evidence/query`, and `/tools/medication_alternatives`. + +![Deployment topology](polyguard-rl/docs/assets/diagrams/deployment_topology.png) + +## Operations And Deployment + +The repository keeps deployment and artifact operations explicit: + +| Surface | Files | +| --- | --- | +| Local/container runtime | [Dockerfile](polyguard-rl/Dockerfile), [Dockerfile.space](polyguard-rl/Dockerfile.space), [docker-compose.yml](polyguard-rl/docker-compose.yml), [requirements.txt](polyguard-rl/requirements.txt), [requirements-space.txt](polyguard-rl/requirements-space.txt) | +| Product Space/API deployment | [scripts/deploy_space.sh](polyguard-rl/scripts/deploy_space.sh), [scripts/deploy_space_api.py](polyguard-rl/scripts/deploy_space_api.py), [docs/deployment.md](polyguard-rl/docs/deployment.md) | +| Training and evidence Spaces | [scripts/deploy_training_space.py](polyguard-rl/scripts/deploy_training_space.py), [scripts/monitor_training_space_status.py](polyguard-rl/scripts/monitor_training_space_status.py), [app/hf_space/training_runner.py](polyguard-rl/app/hf_space/training_runner.py), [app/hf_space/evidence_runner.py](polyguard-rl/app/hf_space/evidence_runner.py) | +| Artifact packaging and activation | [scripts/deploy_final_artifact_space.py](polyguard-rl/scripts/deploy_final_artifact_space.py), [scripts/package_active_model_bundle.py](polyguard-rl/scripts/package_active_model_bundle.py), [scripts/install_hf_active_bundle.py](polyguard-rl/scripts/install_hf_active_bundle.py), [checkpoints/active/active_model_manifest.json](polyguard-rl/checkpoints/active/active_model_manifest.json) | +| Submission validation | [scripts/acceptance_gate.py](polyguard-rl/scripts/acceptance_gate.py), [scripts/validate_submission_links.py](polyguard-rl/scripts/validate_submission_links.py), [docs/submission_checklist.md](polyguard-rl/docs/submission_checklist.md), [docs/submission_artifacts.md](polyguard-rl/docs/submission_artifacts.md) | + +The important operational distinction is that local smoke artifacts, remote +training-space logs, final artifact packaging, and active-model installation are +separate stages. The final README claims are tied to the curated evidence +bundle, not to whichever intermediate output directory happens to exist in a +developer checkout. + +## UI Workbench + +The UI is a React 18 + Vite + TypeScript workbench under +[app/ui/frontend/](polyguard-rl/app/ui/frontend/). It is not the environment +itself; it is an operator surface over the API and OpenEnv runtime. + +[Live workbench Space](https://huggingface.co/spaces/TheJackBright/polyguard-openenv-workbench) + +![Frontend runtime surface](polyguard-rl/docs/assets/diagrams/frontend_runtime_surface.png) + +The main views cover: + +- Patient workbench. +- Episode replay. +- Policy comparison and policy lab. +- Precision dosing. +- Training monitor. +- Safety inspector. +- Candidate actions. +- Reward panel. +- Episode trace. +- Alternative medication search through `/tools/medication_alternatives`. + +The Patient Workbench shows the active model chip, current scenario, candidate +set, agent-vs-environment flow, reward breakdown, and action trace without +requiring the reader to inspect raw JSON. The UI is intentionally a workbench, +not a polished clinical application. + +### UI Sequence + +These screenshots are included in the repo under `polyguard-rl/docs/UI Images/`. +The image links below use URL-encoded paths so they render correctly when the +README is viewed on GitHub or inside the Hugging Face Space. + +1. The workbench opens with model truth, live episode context, scenario status, + candidate count, and reward state. + +![PolyGuard workbench overview](polyguard-rl/docs/UI%20Images/1.jpeg) + +2. The episode panel makes the patient, task, difficulty, sub-environment, risk + delta, and candidate-action console visible without reading raw JSON. + +![Episode overview and candidate console](polyguard-rl/docs/UI%20Images/2.jpeg) + +3. Candidate selection is paired with reward-channel feedback, current + medications, and blocked/available action visibility. + +![Candidate actions and reward channels](polyguard-rl/docs/UI%20Images/3.jpeg) + +4. After an action, the workbench exposes history, warnings, decision payload, + grounded facts, explanation, evidence, and event logs. + +![Action history, decision payload, and evidence](polyguard-rl/docs/UI%20Images/4.jpeg) + +5. The alternatives tool surfaces medication substitutions from the current + regimen and links out to source labels. + +![Medication alternatives tool](polyguard-rl/docs/UI%20Images/5.jpeg) + +## [UI Walkthrough Video](https://drive.google.com/file/d/1YOzad5gvx-tSmGzJNuBgokBF4-dX2T2H/view?usp=sharing) + +This walkthrough shows the deployed workbench surface, including the live model +chip, episode context, candidate actions, reward panels, and evidence-oriented +patient review flow. + +## [Agent In Action: Action Button Demo](https://drive.google.com/file/d/1eHk1v0OYJRrLWVO97ZclN05MYHxmNnmc/view?usp=sharing) + +This demo focuses on what the action button does: selecting a candidate, +submitting it through the environment, producing a verifier-scored transition, +and exposing the resulting reward, action history, warnings, and explanation. + +## [World Model Tool: Tavily And OpenFDA Alternative Suggestions](https://drive.google.com/file/d/1GaUyyaXaBCHjhHFbpkprojNt5pLNAoYi/view?usp=sharing) + +This tool demo shows the world-model support path for alternative medication +suggestions, using Tavily and the OpenFDA government database to retrieve +candidate alternatives and side-effect evidence for safer review. + +## Execution Path For Readers + +For a fresh reviewer, the intended path is: + +1. Read the artifact index: + [polyguard-rl/docs/submission_artifacts.md](polyguard-rl/docs/submission_artifacts.md). +2. Inspect the final curated evidence: + [polyguard-rl/docs/results/final_submission_evidence/README.md](polyguard-rl/docs/results/final_submission_evidence/README.md). +3. Open the one-run notebook: + [PolyGuard_SFT_GRPO_One_Run_Runner.ipynb](polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb). +4. For local smoke work, follow [docs/training.md](polyguard-rl/docs/training.md) + and the local scripts: + [scripts/run_env_local.sh](polyguard-rl/scripts/run_env_local.sh), + [scripts/run_api_local.sh](polyguard-rl/scripts/run_api_local.sh), and + [scripts/run_ui_local.sh](polyguard-rl/scripts/run_ui_local.sh). +5. For full training/reproduction, use the notebook or training docs rather + than copying private artifact commands out of old drafts. +6. For final public artifacts, use the final artifact Space: + [adithya9903/polyguard-openenv-final-artifacts](https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts). + +## Evidence And Artifact Inventory + +Important evidence paths: + +- Final overview: + [docs/results/final_submission_evidence/README.md](polyguard-rl/docs/results/final_submission_evidence/README.md) +- Artifact manifest: + [docs/results/final_submission_evidence/manifest.json](polyguard-rl/docs/results/final_submission_evidence/manifest.json) +- Three-model summary: + [docs/results/final_submission_evidence/reports/submission_summary.json](polyguard-rl/docs/results/final_submission_evidence/reports/submission_summary.json) +- Qwen 3B GRPO report: + [docs/results/final_submission_evidence/reports/grpo_trl_run.json](polyguard-rl/docs/results/final_submission_evidence/reports/grpo_trl_run.json) +- Post-save GRPO inference: + [docs/results/final_submission_evidence/reports/postsave_inference_grpo.json](polyguard-rl/docs/results/final_submission_evidence/reports/postsave_inference_grpo.json) +- Basic LLM vs PolyGuard: + [docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json](polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json) +- Policy ablation: + [docs/results/final_submission_evidence/reports/policy_ablation_report.json](polyguard-rl/docs/results/final_submission_evidence/reports/policy_ablation_report.json) +- Action traces: + [docs/results/final_submission_evidence/reports/action_traces.jsonl](polyguard-rl/docs/results/final_submission_evidence/reports/action_traces.jsonl) +- Curated charts: + [docs/results/final_submission_evidence/charts/curated/README.md](polyguard-rl/docs/results/final_submission_evidence/charts/curated/README.md) + +Important tests: + +| Category | Tests | +| --- | --- | +| Environment contract | [tests/test_openenv_contract.py](polyguard-rl/tests/test_openenv_contract.py), [tests/test_env_reset.py](polyguard-rl/tests/test_env_reset.py), [tests/test_env_step.py](polyguard-rl/tests/test_env_step.py), [tests/test_env_step_flow.py](polyguard-rl/tests/test_env_step_flow.py), [tests/test_future_subenvs.py](polyguard-rl/tests/test_future_subenvs.py) | +| Reward and safety | [tests/test_reward_functions.py](polyguard-rl/tests/test_reward_functions.py), [tests/test_reward_range.py](polyguard-rl/tests/test_reward_range.py), [tests/test_reward_channels.py](polyguard-rl/tests/test_reward_channels.py), [tests/test_anti_cheat.py](polyguard-rl/tests/test_anti_cheat.py), [tests/test_constraints.py](polyguard-rl/tests/test_constraints.py), [tests/test_timeout_logic.py](polyguard-rl/tests/test_timeout_logic.py) | +| Policy and runtime | [tests/test_agents.py](polyguard-rl/tests/test_agents.py), [tests/test_contextual_bandit.py](polyguard-rl/tests/test_contextual_bandit.py), [tests/test_policy_schema.py](polyguard-rl/tests/test_policy_schema.py), [tests/test_provider_runtime.py](polyguard-rl/tests/test_provider_runtime.py), [tests/test_postsave_inference.py](polyguard-rl/tests/test_postsave_inference.py), [tests/test_checkpoint_integrity.py](polyguard-rl/tests/test_checkpoint_integrity.py) | +| API and product tooling | [tests/test_api.py](polyguard-rl/tests/test_api.py), [tests/test_medication_alternatives.py](polyguard-rl/tests/test_medication_alternatives.py), [tests/test_remote_env.py](polyguard-rl/tests/test_remote_env.py) | +| Data and evidence | [tests/test_parser.py](polyguard-rl/tests/test_parser.py), [tests/test_dataops_parser.py](polyguard-rl/tests/test_dataops_parser.py), [tests/test_graph_infer.py](polyguard-rl/tests/test_graph_infer.py), [tests/test_submission_evidence.py](polyguard-rl/tests/test_submission_evidence.py) | +| Submission, notebook, and HF flow | [tests/test_acceptance_gate.py](polyguard-rl/tests/test_acceptance_gate.py), [tests/test_runner_notebook.py](polyguard-rl/tests/test_runner_notebook.py), [tests/test_hf_training_sweep.py](polyguard-rl/tests/test_hf_training_sweep.py) | + +Additional architecture diagrams: + +- [System architecture](polyguard-rl/docs/assets/diagrams/system_architecture.png) +- [Runtime step flow](polyguard-rl/docs/assets/diagrams/runtime_step_flow.png) +- [Data and training pipeline](polyguard-rl/docs/assets/diagrams/data_training_pipeline.png) +- [Multi-agent orchestration](polyguard-rl/docs/assets/diagrams/multi_agent_orchestration.png) +- [Reward decomposition](polyguard-rl/docs/assets/diagrams/reward_decomposition.png) +- [Episode state machine](polyguard-rl/docs/assets/diagrams/episode_state_machine.png) +- [Evidence generation flow](polyguard-rl/docs/assets/diagrams/evidence_generation_flow.png) +- [Deployment topology](polyguard-rl/docs/assets/diagrams/deployment_topology.png) +- [Frontend runtime surface](polyguard-rl/docs/assets/diagrams/frontend_runtime_surface.png) + +## Limitations + +PolyGuard is a simulator and research environment. Its current data substrate is +compact and intentionally inspectable, not a production clinical knowledge base. +The final evidence set is strongest for Qwen 3B because that run has complete +SFT, GRPO, post-save GRPO, policy-ablation, adapter, and checkpoint evidence. +Qwen 0.5B and 1.5B have SFT reports/histories and post-save SFT evidence, but +their adapter directories are marked `reports_only_or_partial` in the final +manifest. + +The reward model is hand-designed and auditable; that is a feature for this +OpenEnv setting, but it also means reward-channel design should be stress-tested +as the data grows. The current ablations show that contextual bandits are useful +and inspectable, while the `llm+bandit` combined stack needs more tuning to +avoid loop-like failure behavior in some settings. + +The right conclusion is not "this is a clinical decision system." The right +conclusion is that constrained environment feedback, verifier-backed rewards, +agentic evaluation, and explicit failure mining are a better substrate for +safety-critical medication-policy learning than free-form prompt responses. + +## References + +- Alexandre Larouche, Audrey Durand, Richard Khoury, Caroline Sirois. + [Neural Bandits for Data Mining: Searching for Dangerous Polypharmacy](https://arxiv.org/abs/2212.05190). + arXiv:2212.05190. +- World Health Organization. + [Medication Without Harm](https://www.who.int/initiatives/medication-without-harm). +- CDC. + [FastStats: Medication Safety Data](https://www.cdc.gov/medication-safety/data-research/facts-stats/index.html). +- Shehab N, Lovegrove MC, Geller AI, et al. + [US Emergency Department Visits for Outpatient Adverse Drug Events, 2013-2014](https://jamanetwork.com/journals/jama/fullarticle/2585977). + JAMA. 2016;316(20):2115-2125. +- AHRQ / NCBI Bookshelf. + [Deprescribing To Reduce Medication Harms in Older Adults](https://www.ncbi.nlm.nih.gov/books/NBK600387/). +- American Geriatrics Society. + [2023 updated AGS Beers Criteria for potentially inappropriate medication use in older adults](https://pmc.ncbi.nlm.nih.gov/articles/PMC12478568/). +- O'Mahony et al. + [STOPP/START criteria for potentially inappropriate prescribing in older people: version 3](https://pmc.ncbi.nlm.nih.gov/articles/PMC10447584/). + +## License + +The project package declares an MIT license in +[polyguard-rl/pyproject.toml](polyguard-rl/pyproject.toml). See +[polyguard-rl/LICENSE](polyguard-rl/LICENSE) for the license text. diff --git a/docs/assets/diagrams/episode_state_machine.png b/docs/assets/diagrams/episode_state_machine.png deleted file mode 100644 index 97406a57fb13e680d03aafc14dfcfe9c6e958a8e..0000000000000000000000000000000000000000 Binary files a/docs/assets/diagrams/episode_state_machine.png and /dev/null differ diff --git a/docs/assets/diagrams/evidence_generation_flow.png b/docs/assets/diagrams/evidence_generation_flow.png deleted file mode 100644 index 0d1724979458a7b91e14c10bba7eb3f478fbca71..0000000000000000000000000000000000000000 Binary files a/docs/assets/diagrams/evidence_generation_flow.png and /dev/null differ diff --git a/docs/assets/diagrams/runtime_step_flow.png b/docs/assets/diagrams/runtime_step_flow.png deleted file mode 100644 index f89c490bd560c58628efbc871be48d6391071e2b..0000000000000000000000000000000000000000 Binary files a/docs/assets/diagrams/runtime_step_flow.png and /dev/null differ diff --git a/docs/results/anti_cheat_failure_rates.png b/docs/results/anti_cheat_failure_rates.png deleted file mode 100644 index d427bcf89e3f4752273406d156b28047a6018b1d..0000000000000000000000000000000000000000 Binary files a/docs/results/anti_cheat_failure_rates.png and /dev/null differ diff --git a/docs/results/avg_process_fidelity.png b/docs/results/avg_process_fidelity.png deleted file mode 100644 index ef562e57a51bcaaec4664c89eb4d2c0c439e6231..0000000000000000000000000000000000000000 Binary files a/docs/results/avg_process_fidelity.png and /dev/null differ diff --git a/docs/results/avg_reward.png b/docs/results/avg_reward.png deleted file mode 100644 index edb2fa8c25074d88c90bce5c243af90dcb28e1c6..0000000000000000000000000000000000000000 Binary files a/docs/results/avg_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/anti_cheat_failure_rates.png b/docs/results/final_submission_evidence/charts/all/anti_cheat_failure_rates.png deleted file mode 100644 index d427bcf89e3f4752273406d156b28047a6018b1d..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/anti_cheat_failure_rates.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/avg_reward.png b/docs/results/final_submission_evidence/charts/all/avg_reward.png deleted file mode 100644 index edb2fa8c25074d88c90bce5c243af90dcb28e1c6..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/avg_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_latency.png b/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_latency.png deleted file mode 100644 index 02e20931b6ef796b3f1a0a9818ca0035bcb7b8a3..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_latency.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_legality.png b/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_legality.png deleted file mode 100644 index 180ef4bb099a8b7c254db02e1281cd8e308bf058..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_legality.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward.png b/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/grpo_reward_curves.png b/docs/results/final_submission_evidence/charts/all/grpo_reward_curves.png deleted file mode 100644 index e65d51f9fa5b56301ea2a14915aaf2b240f1e5ea..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/grpo_reward_curves.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/inference_latency_validity.png b/docs/results/final_submission_evidence/charts/all/inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/inference_latency_validity.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/inference_validity_reward.png b/docs/results/final_submission_evidence/charts/all/inference_validity_reward.png deleted file mode 100644 index e8dce9f4126e6e140650f1b0f29ad45975c93bc4..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/inference_validity_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/legality_rate.png b/docs/results/final_submission_evidence/charts/all/legality_rate.png deleted file mode 100644 index b4c1e418b0262902ad1c9ad4818f4d9b22a152d0..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/legality_rate.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/policy_ablation_avg_reward.png b/docs/results/final_submission_evidence/charts/all/policy_ablation_avg_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/policy_ablation_avg_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/policy_ablation_exploit_detection.png b/docs/results/final_submission_evidence/charts/all/policy_ablation_exploit_detection.png deleted file mode 100644 index 9cd4e59749283b799fd201f4891e317e5114bffe..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/policy_ablation_exploit_detection.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/policy_ablation_legality.png b/docs/results/final_submission_evidence/charts/all/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/policy_stack_avg_reward.png b/docs/results/final_submission_evidence/charts/all/policy_stack_avg_reward.png deleted file mode 100644 index b28dc57ac180e83b38194b17251e3cf3a5a941da..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/policy_stack_avg_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/primary_reward_channel_bars.png b/docs/results/final_submission_evidence/charts/all/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_learning_rate.png deleted file mode 100644 index fd2177cf3dc3a560ce5ecbd35643d74afdfb5e74..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png deleted file mode 100644 index 7536c6c7a9bf801667d66b1ef90d596a4babc2a1..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_training_loss.png deleted file mode 100644 index c02c8607fe3391354ab2842bd8a4b915dca9acfa..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_training_loss.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_final_sft_train_loss.png deleted file mode 100644 index bb59d81635691028de9facebc81176101aa2c96c..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_final_sft_train_loss.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_latency.png deleted file mode 100644 index e153b1f095989dc4cf90174ea8b134f5d56199c5..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_latency.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_reward.png deleted file mode 100644 index bdf750941a51d0bb5f814bc40c4d38971e77c6a7..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_remote_completed_stage_durations.png deleted file mode 100644 index acc838c0f6d0a4df5e224e9bbc255bc66bb4a321..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_remote_completed_stage_durations.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_sft_runtime.png deleted file mode 100644 index ecdef2a719de99be652196bcb0df57a243ae7cbe..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_sft_runtime.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_learning_rate.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_learning_rate.png deleted file mode 100644 index 222b8f99d80c4b446a091c0cdaa298ba6bbde41d..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_token_accuracy.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_token_accuracy.png deleted file mode 100644 index f000cd04d336995480104589dee2d11c19316c5a..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_training_loss.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_training_loss.png deleted file mode 100644 index b225367050c41c65547905cd4bc2e71f3cf386d2..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_loss_comparison.png deleted file mode 100644 index 486c32ae421f42e7c511b810ed0540ad43351e0c..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_loss_comparison.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png deleted file mode 100644 index 880bf409233e709dd4a37fe94f36935af77afc53..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_learning_rate.png b/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_learning_rate.png deleted file mode 100644 index 86065d55a1123ffbbc66c590400e0876a4dd6625..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_token_accuracy.png b/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_token_accuracy.png deleted file mode 100644 index 333d48c0b38669090a62004e648ccd3c481d7f2f..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_training_loss.png b/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_training_loss.png deleted file mode 100644 index d82b239d3c372b9ff6e6c38cb3807f2a92da29c2..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_model_grpo_reward.png b/docs/results/final_submission_evidence/charts/all/qwen_model_grpo_reward.png deleted file mode 100644 index ba56fd46b8319c7079ee914ec0058e4fe5c78fc9..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_model_grpo_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_model_sft_loss.png b/docs/results/final_submission_evidence/charts/all/qwen_model_sft_loss.png deleted file mode 100644 index 1704e1874b29e3940d039859473ab6c6976b910e..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_model_sft_loss.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/qwen_model_sft_reward.png b/docs/results/final_submission_evidence/charts/all/qwen_model_sft_reward.png deleted file mode 100644 index c5462417c93e3527d7224d806ef80b153051050a..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/qwen_model_sft_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/reward_component_bars.png b/docs/results/final_submission_evidence/charts/all/reward_component_bars.png deleted file mode 100644 index 850ed462c7e58b7ad2f4ab88cae557f95d1b689e..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/reward_component_bars.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/sft_loss_curves.png b/docs/results/final_submission_evidence/charts/all/sft_loss_curves.png deleted file mode 100644 index 60710fb94d95eba319e3426b4166a62877fe08cc..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/sft_loss_curves.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/sft_validity_reward.png b/docs/results/final_submission_evidence/charts/all/sft_validity_reward.png deleted file mode 100644 index db8560c0d68a0878ab4d91ea1d27ae77276e20ec..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/sft_validity_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/sft_vs_grpo_reward.png b/docs/results/final_submission_evidence/charts/all/sft_vs_grpo_reward.png deleted file mode 100644 index 0938d1b65b686f5a79f614601f7b434963e79094..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/sft_vs_grpo_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/all/train_holdout_gap.png b/docs/results/final_submission_evidence/charts/all/train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/all/train_holdout_gap.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/inference/inference_latency_validity.png b/docs/results/final_submission_evidence/charts/curated/inference/inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/inference/inference_latency_validity.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/inference/inference_validity_reward.png b/docs/results/final_submission_evidence/charts/curated/inference/inference_validity_reward.png deleted file mode 100644 index e8dce9f4126e6e140650f1b0f29ad45975c93bc4..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/inference/inference_validity_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_grpo_reward.png b/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_grpo_reward.png deleted file mode 100644 index ba56fd46b8319c7079ee914ec0058e4fe5c78fc9..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_grpo_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_sft_reward.png b/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_sft_reward.png deleted file mode 100644 index c5462417c93e3527d7224d806ef80b153051050a..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_sft_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_loss_by_qwen_size.png b/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_loss_by_qwen_size.png deleted file mode 100644 index 4a687c2fae59bd3b623d905397c68b87b7ad1815..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_loss_by_qwen_size.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_vs_grpo_reward_by_model.png b/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_vs_grpo_reward_by_model.png deleted file mode 100644 index e009a28787920140b8dabb013fad290e869d7ccb..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_vs_grpo_reward_by_model.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_legality.png b/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_reward.png b/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png b/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/reward_delta_by_seed.png b/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/reward_and_safety/anti_cheat_failure_rates.png b/docs/results/final_submission_evidence/charts/curated/reward_and_safety/anti_cheat_failure_rates.png deleted file mode 100644 index d427bcf89e3f4752273406d156b28047a6018b1d..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/reward_and_safety/anti_cheat_failure_rates.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/reward_and_safety/primary_reward_channel_bars.png b/docs/results/final_submission_evidence/charts/curated/reward_and_safety/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/reward_and_safety/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/reward_and_safety/train_holdout_gap.png b/docs/results/final_submission_evidence/charts/curated/reward_and_safety/train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/reward_and_safety/train_holdout_gap.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_kl_curve.png b/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_kl_curve.png deleted file mode 100644 index df0388d2bc37ae0594eba5b44b3ec4b952011a6f..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_kl_curve.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_loss_curve.png b/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_loss_curve.png deleted file mode 100644 index ba43528fd6f127d7d99280c7ac5d87c4dfaaa02d..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_loss_curve.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_sft_training_loss.png b/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_sft_training_loss.png deleted file mode 100644 index c02c8607fe3391354ab2842bd8a4b915dca9acfa..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/curated/training/sft_loss_curves_all_models.png b/docs/results/final_submission_evidence/charts/curated/training/sft_loss_curves_all_models.png deleted file mode 100644 index 60710fb94d95eba319e3426b4166a62877fe08cc..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/curated/training/sft_loss_curves_all_models.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/00_sft_vs_grpo_reward_by_model.png b/docs/results/final_submission_evidence/charts/frontpage/00_sft_vs_grpo_reward_by_model.png deleted file mode 100644 index e009a28787920140b8dabb013fad290e869d7ccb..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/00_sft_vs_grpo_reward_by_model.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png b/docs/results/final_submission_evidence/charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/02_reward_delta_by_seed.png b/docs/results/final_submission_evidence/charts/frontpage/02_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/02_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/03_policy_ablation_reward.png b/docs/results/final_submission_evidence/charts/frontpage/03_policy_ablation_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/03_policy_ablation_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/05_train_holdout_gap.png b/docs/results/final_submission_evidence/charts/frontpage/05_train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/05_train_holdout_gap.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/06_inference_latency_validity.png b/docs/results/final_submission_evidence/charts/frontpage/06_inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/06_inference_latency_validity.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/07_sft_vs_grpo_reward.png b/docs/results/final_submission_evidence/charts/frontpage/07_sft_vs_grpo_reward.png deleted file mode 100644 index 0938d1b65b686f5a79f614601f7b434963e79094..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/07_sft_vs_grpo_reward.png and /dev/null differ diff --git a/docs/results/final_submission_evidence/charts/frontpage/08_sft_loss_by_model.png b/docs/results/final_submission_evidence/charts/frontpage/08_sft_loss_by_model.png deleted file mode 100644 index 4a687c2fae59bd3b623d905397c68b87b7ad1815..0000000000000000000000000000000000000000 Binary files a/docs/results/final_submission_evidence/charts/frontpage/08_sft_loss_by_model.png and /dev/null differ diff --git a/docs/results/grpo_reward_curves.png b/docs/results/grpo_reward_curves.png deleted file mode 100644 index e65d51f9fa5b56301ea2a14915aaf2b240f1e5ea..0000000000000000000000000000000000000000 Binary files a/docs/results/grpo_reward_curves.png and /dev/null differ diff --git a/docs/results/grpo_training_cycle/avg_process_fidelity.png b/docs/results/grpo_training_cycle/avg_process_fidelity.png deleted file mode 100644 index ef562e57a51bcaaec4664c89eb4d2c0c439e6231..0000000000000000000000000000000000000000 Binary files a/docs/results/grpo_training_cycle/avg_process_fidelity.png and /dev/null differ diff --git a/docs/results/grpo_training_cycle/avg_reward.png b/docs/results/grpo_training_cycle/avg_reward.png deleted file mode 100644 index edb2fa8c25074d88c90bce5c243af90dcb28e1c6..0000000000000000000000000000000000000000 Binary files a/docs/results/grpo_training_cycle/avg_reward.png and /dev/null differ diff --git a/docs/results/grpo_training_cycle/legality_rate.png b/docs/results/grpo_training_cycle/legality_rate.png deleted file mode 100644 index b4c1e418b0262902ad1c9ad4818f4d9b22a152d0..0000000000000000000000000000000000000000 Binary files a/docs/results/grpo_training_cycle/legality_rate.png and /dev/null differ diff --git a/docs/results/grpo_training_cycle/policy_stack_avg_reward.png b/docs/results/grpo_training_cycle/policy_stack_avg_reward.png deleted file mode 100644 index b28dc57ac180e83b38194b17251e3cf3a5a941da..0000000000000000000000000000000000000000 Binary files a/docs/results/grpo_training_cycle/policy_stack_avg_reward.png and /dev/null differ diff --git a/docs/results/grpo_training_cycle/success_rate.png b/docs/results/grpo_training_cycle/success_rate.png deleted file mode 100644 index b918ae36817cfb351bb924de05a638e1ee4c73c2..0000000000000000000000000000000000000000 Binary files a/docs/results/grpo_training_cycle/success_rate.png and /dev/null differ diff --git a/docs/results/inference_latency_validity.png b/docs/results/inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/inference_latency_validity.png and /dev/null differ diff --git a/docs/results/inference_validity_reward.png b/docs/results/inference_validity_reward.png deleted file mode 100644 index e8dce9f4126e6e140650f1b0f29ad45975c93bc4..0000000000000000000000000000000000000000 Binary files a/docs/results/inference_validity_reward.png and /dev/null differ diff --git a/docs/results/legality_rate.png b/docs/results/legality_rate.png deleted file mode 100644 index b4c1e418b0262902ad1c9ad4818f4d9b22a152d0..0000000000000000000000000000000000000000 Binary files a/docs/results/legality_rate.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/grpo_training/grpo_reward_curves.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/grpo_training/grpo_reward_curves.png deleted file mode 100644 index e65d51f9fa5b56301ea2a14915aaf2b240f1e5ea..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/grpo_training/grpo_reward_curves.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/inference/inference_latency_validity.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/inference/inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/inference/inference_latency_validity.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_grpo_reward.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_grpo_reward.png deleted file mode 100644 index ba56fd46b8319c7079ee914ec0058e4fe5c78fc9..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_grpo_reward.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_loss.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_loss.png deleted file mode 100644 index 1704e1874b29e3940d039859473ab6c6976b910e..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_loss.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_reward.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_reward.png deleted file mode 100644 index c5462417c93e3527d7224d806ef80b153051050a..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_reward.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/overfit_checks/train_holdout_gap.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/overfit_checks/train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/overfit_checks/train_holdout_gap.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_avg_reward.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_avg_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_avg_reward.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_legality.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_stack_avg_reward.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_stack_avg_reward.png deleted file mode 100644 index b28dc57ac180e83b38194b17251e3cf3a5a941da..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_stack_avg_reward.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_legality.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_legality.png deleted file mode 100644 index 180ef4bb099a8b7c254db02e1281cd8e308bf058..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_legality.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/primary_reward_channel_bars.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/safeguards/anti_cheat_failure_rates.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/safeguards/anti_cheat_failure_rates.png deleted file mode 100644 index d427bcf89e3f4752273406d156b28047a6018b1d..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/safeguards/anti_cheat_failure_rates.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/sft_vs_grpo/sft_vs_grpo_reward.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/sft_vs_grpo/sft_vs_grpo_reward.png deleted file mode 100644 index 0938d1b65b686f5a79f614601f7b434963e79094..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/sft_vs_grpo/sft_vs_grpo_reward.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_accuracy/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_accuracy/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png deleted file mode 100644 index 1536e4dfd6347ca64b03d084d313338b0c1b17ae..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_accuracy/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_sft_training_loss.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_sft_training_loss.png deleted file mode 100644 index b225367050c41c65547905cd4bc2e71f3cf386d2..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_vs_1_5b_sft_loss_comparison.png deleted file mode 100644 index 549ab73213108e91d56f76aa8bd4c69353075013..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_vs_1_5b_sft_loss_comparison.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_1_5b_sft_training_loss.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_1_5b_sft_training_loss.png deleted file mode 100644 index d82b239d3c372b9ff6e6c38cb3807f2a92da29c2..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_1_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_runtime/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_runtime/qwen_0_5b_1_5b_sft_runtime.png deleted file mode 100644 index 0b755a7340b17cfe28c46132a56d46c0be69893e..0000000000000000000000000000000000000000 Binary files a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_runtime/qwen_0_5b_1_5b_sft_runtime.png and /dev/null differ diff --git a/docs/results/policy_stack_avg_reward.png b/docs/results/policy_stack_avg_reward.png deleted file mode 100644 index b28dc57ac180e83b38194b17251e3cf3a5a941da..0000000000000000000000000000000000000000 Binary files a/docs/results/policy_stack_avg_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png b/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png deleted file mode 100644 index c507a99ebf9c1eda100ef16e24048ff56068532a..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png b/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png deleted file mode 100644 index 180ef4bb099a8b7c254db02e1281cd8e308bf058..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png b/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png b/docs/results/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png b/docs/results/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png deleted file mode 100644 index 9cd4e59749283b799fd201f4891e317e5114bffe..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/policy_ablation_legality.png b/docs/results/qwen_completed_runs/charts/generated/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png b/docs/results/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png deleted file mode 100644 index faad3cf8e80a1a89048880cb1d9ad9caef5d77a8..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png deleted file mode 100644 index 850bbc6cdb174041ac0bf912ea9e61943594d5eb..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png deleted file mode 100644 index 7efa5b5b6f9151b696747c77bfb565226b1c9e57..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png deleted file mode 100644 index 8f9fdbd4ed1b3bbafe2d8d11c0abd602fa354888..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png deleted file mode 100644 index 0b755a7340b17cfe28c46132a56d46c0be69893e..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png deleted file mode 100644 index 222b8f99d80c4b446a091c0cdaa298ba6bbde41d..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png deleted file mode 100644 index f000cd04d336995480104589dee2d11c19316c5a..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png deleted file mode 100644 index b225367050c41c65547905cd4bc2e71f3cf386d2..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png deleted file mode 100644 index 549ab73213108e91d56f76aa8bd4c69353075013..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png deleted file mode 100644 index 1536e4dfd6347ca64b03d084d313338b0c1b17ae..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png deleted file mode 100644 index 86065d55a1123ffbbc66c590400e0876a4dd6625..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png deleted file mode 100644 index 333d48c0b38669090a62004e648ccd3c481d7f2f..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png deleted file mode 100644 index d82b239d3c372b9ff6e6c38cb3807f2a92da29c2..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png b/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png deleted file mode 100644 index d427bcf89e3f4752273406d156b28047a6018b1d..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png deleted file mode 100644 index edb2fa8c25074d88c90bce5c243af90dcb28e1c6..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png b/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png deleted file mode 100644 index e65d51f9fa5b56301ea2a14915aaf2b240f1e5ea..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png b/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png deleted file mode 100644 index e8dce9f4126e6e140650f1b0f29ad45975c93bc4..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/legality_rate.png b/docs/results/qwen_completed_runs/charts/local_available_combined/legality_rate.png deleted file mode 100644 index b4c1e418b0262902ad1c9ad4818f4d9b22a152d0..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/legality_rate.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png deleted file mode 100644 index b28dc57ac180e83b38194b17251e3cf3a5a941da..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png deleted file mode 100644 index ba56fd46b8319c7079ee914ec0058e4fe5c78fc9..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png deleted file mode 100644 index 1704e1874b29e3940d039859473ab6c6976b910e..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png deleted file mode 100644 index c5462417c93e3527d7224d806ef80b153051050a..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png b/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png deleted file mode 100644 index 850ed462c7e58b7ad2f4ab88cae557f95d1b689e..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png deleted file mode 100644 index 60710fb94d95eba319e3426b4166a62877fe08cc..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png deleted file mode 100644 index db8560c0d68a0878ab4d91ea1d27ae77276e20ec..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png deleted file mode 100644 index 0938d1b65b686f5a79f614601f7b434963e79094..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png and /dev/null differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png b/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png and /dev/null differ diff --git a/docs/results/qwen_model_grpo_reward.png b/docs/results/qwen_model_grpo_reward.png deleted file mode 100644 index ba56fd46b8319c7079ee914ec0058e4fe5c78fc9..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_model_grpo_reward.png and /dev/null differ diff --git a/docs/results/qwen_model_sft_loss.png b/docs/results/qwen_model_sft_loss.png deleted file mode 100644 index 1704e1874b29e3940d039859473ab6c6976b910e..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_model_sft_loss.png and /dev/null differ diff --git a/docs/results/qwen_model_sft_reward.png b/docs/results/qwen_model_sft_reward.png deleted file mode 100644 index c5462417c93e3527d7224d806ef80b153051050a..0000000000000000000000000000000000000000 Binary files a/docs/results/qwen_model_sft_reward.png and /dev/null differ diff --git a/docs/results/reward_component_bars.png b/docs/results/reward_component_bars.png deleted file mode 100644 index 850ed462c7e58b7ad2f4ab88cae557f95d1b689e..0000000000000000000000000000000000000000 Binary files a/docs/results/reward_component_bars.png and /dev/null differ diff --git a/docs/results/sft_loss_curves.png b/docs/results/sft_loss_curves.png deleted file mode 100644 index 60710fb94d95eba319e3426b4166a62877fe08cc..0000000000000000000000000000000000000000 Binary files a/docs/results/sft_loss_curves.png and /dev/null differ diff --git a/docs/results/sft_validity_reward.png b/docs/results/sft_validity_reward.png deleted file mode 100644 index db8560c0d68a0878ab4d91ea1d27ae77276e20ec..0000000000000000000000000000000000000000 Binary files a/docs/results/sft_validity_reward.png and /dev/null differ diff --git a/docs/results/sft_vs_grpo_reward.png b/docs/results/sft_vs_grpo_reward.png deleted file mode 100644 index 0938d1b65b686f5a79f614601f7b434963e79094..0000000000000000000000000000000000000000 Binary files a/docs/results/sft_vs_grpo_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png deleted file mode 100644 index c507a99ebf9c1eda100ef16e24048ff56068532a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png deleted file mode 100644 index 180ef4bb099a8b7c254db02e1281cd8e308bf058..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png deleted file mode 100644 index 9cd4e59749283b799fd201f4891e317e5114bffe..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png deleted file mode 100644 index faad3cf8e80a1a89048880cb1d9ad9caef5d77a8..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png deleted file mode 100644 index 850bbc6cdb174041ac0bf912ea9e61943594d5eb..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png deleted file mode 100644 index 7efa5b5b6f9151b696747c77bfb565226b1c9e57..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png deleted file mode 100644 index 8f9fdbd4ed1b3bbafe2d8d11c0abd602fa354888..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png deleted file mode 100644 index 0b755a7340b17cfe28c46132a56d46c0be69893e..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png deleted file mode 100644 index 222b8f99d80c4b446a091c0cdaa298ba6bbde41d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png deleted file mode 100644 index f000cd04d336995480104589dee2d11c19316c5a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png deleted file mode 100644 index b225367050c41c65547905cd4bc2e71f3cf386d2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png deleted file mode 100644 index 549ab73213108e91d56f76aa8bd4c69353075013..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png deleted file mode 100644 index 1536e4dfd6347ca64b03d084d313338b0c1b17ae..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png deleted file mode 100644 index 86065d55a1123ffbbc66c590400e0876a4dd6625..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png deleted file mode 100644 index 333d48c0b38669090a62004e648ccd3c481d7f2f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png deleted file mode 100644 index d82b239d3c372b9ff6e6c38cb3807f2a92da29c2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png deleted file mode 100644 index 22e5354ec7a37fe69cbbc1d7470164ead83ad14b..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png deleted file mode 100644 index 180ef4bb099a8b7c254db02e1281cd8e308bf058..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png deleted file mode 100644 index 9cd4e59749283b799fd201f4891e317e5114bffe..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png deleted file mode 100644 index fd2177cf3dc3a560ce5ecbd35643d74afdfb5e74..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png deleted file mode 100644 index 7536c6c7a9bf801667d66b1ef90d596a4babc2a1..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png deleted file mode 100644 index c02c8607fe3391354ab2842bd8a4b915dca9acfa..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png deleted file mode 100644 index bb59d81635691028de9facebc81176101aa2c96c..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png deleted file mode 100644 index e402f82d70d8172b87407953f1c7489f5adae266..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png deleted file mode 100644 index 1270598b557f9896c48ba0267bb6ceb96982d792..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png deleted file mode 100644 index acc838c0f6d0a4df5e224e9bbc255bc66bb4a321..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png deleted file mode 100644 index ecdef2a719de99be652196bcb0df57a243ae7cbe..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png deleted file mode 100644 index 222b8f99d80c4b446a091c0cdaa298ba6bbde41d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png deleted file mode 100644 index f000cd04d336995480104589dee2d11c19316c5a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png deleted file mode 100644 index b225367050c41c65547905cd4bc2e71f3cf386d2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png deleted file mode 100644 index 486c32ae421f42e7c511b810ed0540ad43351e0c..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png deleted file mode 100644 index 880bf409233e709dd4a37fe94f36935af77afc53..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png deleted file mode 100644 index 86065d55a1123ffbbc66c590400e0876a4dd6625..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png deleted file mode 100644 index 333d48c0b38669090a62004e648ccd3c481d7f2f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png deleted file mode 100644 index d82b239d3c372b9ff6e6c38cb3807f2a92da29c2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png deleted file mode 100644 index c507a99ebf9c1eda100ef16e24048ff56068532a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png deleted file mode 100644 index 180ef4bb099a8b7c254db02e1281cd8e308bf058..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png deleted file mode 100644 index 9cd4e59749283b799fd201f4891e317e5114bffe..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png deleted file mode 100644 index faad3cf8e80a1a89048880cb1d9ad9caef5d77a8..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png deleted file mode 100644 index 850bbc6cdb174041ac0bf912ea9e61943594d5eb..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png deleted file mode 100644 index 7efa5b5b6f9151b696747c77bfb565226b1c9e57..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png deleted file mode 100644 index 8f9fdbd4ed1b3bbafe2d8d11c0abd602fa354888..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png deleted file mode 100644 index 0b755a7340b17cfe28c46132a56d46c0be69893e..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png deleted file mode 100644 index 222b8f99d80c4b446a091c0cdaa298ba6bbde41d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png deleted file mode 100644 index f000cd04d336995480104589dee2d11c19316c5a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png deleted file mode 100644 index b225367050c41c65547905cd4bc2e71f3cf386d2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png deleted file mode 100644 index 549ab73213108e91d56f76aa8bd4c69353075013..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png deleted file mode 100644 index 1536e4dfd6347ca64b03d084d313338b0c1b17ae..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png deleted file mode 100644 index 86065d55a1123ffbbc66c590400e0876a4dd6625..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png deleted file mode 100644 index 333d48c0b38669090a62004e648ccd3c481d7f2f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png deleted file mode 100644 index d82b239d3c372b9ff6e6c38cb3807f2a92da29c2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png deleted file mode 100644 index d427bcf89e3f4752273406d156b28047a6018b1d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png deleted file mode 100644 index edb2fa8c25074d88c90bce5c243af90dcb28e1c6..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png deleted file mode 100644 index e65d51f9fa5b56301ea2a14915aaf2b240f1e5ea..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_validity_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_validity_reward.png deleted file mode 100644 index e8dce9f4126e6e140650f1b0f29ad45975c93bc4..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_validity_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png deleted file mode 100644 index b4c1e418b0262902ad1c9ad4818f4d9b22a152d0..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png deleted file mode 100644 index b28dc57ac180e83b38194b17251e3cf3a5a941da..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png deleted file mode 100644 index ba56fd46b8319c7079ee914ec0058e4fe5c78fc9..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png deleted file mode 100644 index 1704e1874b29e3940d039859473ab6c6976b910e..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png deleted file mode 100644 index c5462417c93e3527d7224d806ef80b153051050a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png deleted file mode 100644 index 850ed462c7e58b7ad2f4ab88cae557f95d1b689e..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png deleted file mode 100644 index 60710fb94d95eba319e3426b4166a62877fe08cc..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_validity_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_validity_reward.png deleted file mode 100644 index db8560c0d68a0878ab4d91ea1d27ae77276e20ec..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_validity_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png deleted file mode 100644 index 0938d1b65b686f5a79f614601f7b434963e79094..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png deleted file mode 100644 index 02e20931b6ef796b3f1a0a9818ca0035bcb7b8a3..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png deleted file mode 100644 index 180ef4bb099a8b7c254db02e1281cd8e308bf058..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png deleted file mode 100644 index 630724370ea5b0c19b60ae41173f4c835d37accb..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png deleted file mode 100644 index 636dcbb7a4d53f984f1cf1ef549bf581e6792604..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png deleted file mode 100644 index 4baa16a56f2615342fadaaf8b08b3b6247f9824f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png deleted file mode 100644 index 9cd4e59749283b799fd201f4891e317e5114bffe..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png deleted file mode 100644 index 0d394038c07f85a7d92077d553ae570bfba07caf..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png deleted file mode 100644 index 2b33f8c40f985870bbf6ad986307cf9988ae229d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png deleted file mode 100644 index fd2177cf3dc3a560ce5ecbd35643d74afdfb5e74..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png deleted file mode 100644 index 7536c6c7a9bf801667d66b1ef90d596a4babc2a1..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png deleted file mode 100644 index c02c8607fe3391354ab2842bd8a4b915dca9acfa..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png deleted file mode 100644 index bb59d81635691028de9facebc81176101aa2c96c..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png deleted file mode 100644 index e153b1f095989dc4cf90174ea8b134f5d56199c5..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png deleted file mode 100644 index bdf750941a51d0bb5f814bc40c4d38971e77c6a7..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png deleted file mode 100644 index acc838c0f6d0a4df5e224e9bbc255bc66bb4a321..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png deleted file mode 100644 index ecdef2a719de99be652196bcb0df57a243ae7cbe..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png deleted file mode 100644 index 222b8f99d80c4b446a091c0cdaa298ba6bbde41d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png deleted file mode 100644 index f000cd04d336995480104589dee2d11c19316c5a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png deleted file mode 100644 index b225367050c41c65547905cd4bc2e71f3cf386d2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png deleted file mode 100644 index 486c32ae421f42e7c511b810ed0540ad43351e0c..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png deleted file mode 100644 index 880bf409233e709dd4a37fe94f36935af77afc53..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png deleted file mode 100644 index 86065d55a1123ffbbc66c590400e0876a4dd6625..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png deleted file mode 100644 index 333d48c0b38669090a62004e648ccd3c481d7f2f..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png deleted file mode 100644 index d82b239d3c372b9ff6e6c38cb3807f2a92da29c2..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png deleted file mode 100644 index d427bcf89e3f4752273406d156b28047a6018b1d..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png deleted file mode 100644 index edb2fa8c25074d88c90bce5c243af90dcb28e1c6..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png deleted file mode 100644 index e65d51f9fa5b56301ea2a14915aaf2b240f1e5ea..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png deleted file mode 100644 index 1037053ea236e314bff051771b9a686a294aa9a4..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_validity_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_validity_reward.png deleted file mode 100644 index e8dce9f4126e6e140650f1b0f29ad45975c93bc4..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_validity_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png deleted file mode 100644 index b4c1e418b0262902ad1c9ad4818f4d9b22a152d0..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png deleted file mode 100644 index b28dc57ac180e83b38194b17251e3cf3a5a941da..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png deleted file mode 100644 index ba56fd46b8319c7079ee914ec0058e4fe5c78fc9..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png deleted file mode 100644 index 1704e1874b29e3940d039859473ab6c6976b910e..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png deleted file mode 100644 index c5462417c93e3527d7224d806ef80b153051050a..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png deleted file mode 100644 index 850ed462c7e58b7ad2f4ab88cae557f95d1b689e..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png deleted file mode 100644 index 60710fb94d95eba319e3426b4166a62877fe08cc..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_validity_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_validity_reward.png deleted file mode 100644 index db8560c0d68a0878ab4d91ea1d27ae77276e20ec..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_validity_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png deleted file mode 100644 index 0938d1b65b686f5a79f614601f7b434963e79094..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png and /dev/null differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png and /dev/null differ diff --git a/docs/results/success_rate.png b/docs/results/success_rate.png deleted file mode 100644 index b918ae36817cfb351bb924de05a638e1ee4c73c2..0000000000000000000000000000000000000000 Binary files a/docs/results/success_rate.png and /dev/null differ diff --git a/docs/results/train_holdout_gap.png b/docs/results/train_holdout_gap.png deleted file mode 100644 index 3fbf53ce81e3f27087a3db7baecdfc37f81a74fc..0000000000000000000000000000000000000000 Binary files a/docs/results/train_holdout_gap.png and /dev/null differ diff --git a/.dockerignore b/polyguard-rl/.dockerignore similarity index 100% rename from .dockerignore rename to polyguard-rl/.dockerignore diff --git a/.env.example b/polyguard-rl/.env.example similarity index 100% rename from .env.example rename to polyguard-rl/.env.example diff --git a/.gitignore b/polyguard-rl/.gitignore similarity index 100% rename from .gitignore rename to polyguard-rl/.gitignore diff --git a/polyguard-rl/Dockerfile b/polyguard-rl/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9ce64c71f25ee6068c4c1dc973c42411e5f5c316 --- /dev/null +++ b/polyguard-rl/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.11-slim + +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8100 8200 + +CMD ["python", "-m", "server.app", "--host", "0.0.0.0", "--port", "8100"] diff --git a/Dockerfile.space b/polyguard-rl/Dockerfile.space similarity index 100% rename from Dockerfile.space rename to polyguard-rl/Dockerfile.space diff --git a/LICENSE b/polyguard-rl/LICENSE similarity index 100% rename from LICENSE rename to polyguard-rl/LICENSE diff --git a/Makefile b/polyguard-rl/Makefile similarity index 100% rename from Makefile rename to polyguard-rl/Makefile diff --git a/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb b/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb similarity index 100% rename from PolyGuard_SFT_GRPO_One_Run_Runner.ipynb rename to polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb diff --git a/polyguard-rl/README.md b/polyguard-rl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..961f3fa9885320a533dbde69d5430f5f7177b6ae --- /dev/null +++ b/polyguard-rl/README.md @@ -0,0 +1,59 @@ +# PolyGuard (OpenEnv implementation package) + +Run all CLI commands from this directory (`cd polyguard-rl`). The repository root [`README.md`](../README.md) carries the same submission narrative with paths adjusted for viewers landing on the GitHub repo home page. + +## Submission Links + +- GitHub Repo URL: [https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK](https://github.com/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK) +- HF Space URL: [https://huggingface.co/spaces/TheJackBright/polyguard-openenv-workbench](https://huggingface.co/spaces/TheJackBright/polyguard-openenv-workbench) +- Colab Notebook URL: [https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb](https://colab.research.google.com/github/Vishwa-docs/Meta_Pytorch_OpenEnv_Scaler_VK/blob/master/polyguard-rl/PolyGuard_SFT_GRPO_One_Run_Runner.ipynb) (see also `notebooks/09_training_loop.ipynb` for a modular training walkthrough) +- YouTube Video URL: not used for this submission; the repository root README is the story artifact. +- Story artifact: the repository root [`README.md`](../README.md) is the final blog-style narrative and evidence map. + +## Shared Environment, Logs, And Scripts + +The required environment files, training logs, and training scripts are shared +in the repo and indexed in [Submission Artifact Index](docs/submission_artifacts.md). + +- Environment/runtime: `openenv.yaml`, `pyproject.toml`, `uv.lock`, `requirements*.txt`, `Dockerfile*`, `app/env/`, `server/app.py`, and `app/hf_space/Dockerfile`. +- Training scripts/notebooks: `PolyGuard_SFT_GRPO_One_Run_Runner.ipynb`, `notebooks/09_training_loop.ipynb`, `scripts/train_sft_trl.py`, `scripts/train_grpo_trl.py`, `scripts/deploy_training_space.py`, `app/hf_space/training_runner.py`, and `app/training/`. +- Training logs/results: `docs/results/final_submission_evidence/reports/`, `docs/results/sweeps/`, `docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/`, and `docs/results/qwen_completed_runs/reports/`. +- Final downloadable artifact Space: [https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts](https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts). + +## Problem Statement + +Polypharmacy decisions are long-horizon, partially observable, and safety-critical. PolyGuard is a research environment where an LLM agent selects constrained clinical actions, receives verifier-backed reward, and improves via SFT + GRPO—not generic open-ended chat fine-tuning. + +## Environment + +`PolyGuardEnv` exposes OpenEnv-style HTTP/WebSocket endpoints (`/reset`, `/step`, `/state`, `/metadata`, `/schema`, `/mcp`, `/health`, `/ws`). Sub-environments include DDI, bandit mining, regimen risk, precision dosing, longitudinal deprescribing, web-search missing data, alternative suggestion, and new-drug decomposition. See `openenv.yaml`, `app/env/env_core.py`, `app/env/fastapi_app.py`, and `docs/environment_design.md`. + +## Agent Capabilities + +Medication reconciliation, evidence retrieval, graph safety, dosing guardrails, candidate generation, supervisor routing, planner/critic stack, explanations, and contextual bandit ranking for ablations (`app/agents/`, `docs/agents.md`). + +## Tasks + +DDI risk reduction, safe adds/substitutions, regimen optimization, taper/deprescribing sequences, precision dosing, missing-data recovery, and new-drug decomposition (`data/scenarios/`, `app/env/catalog.py`). + +## Reward Model / Evaluation Logic + +Thirteen verifier-backed reward components roll up into four primary channels (`safety_legality`, `clinical_improvement`, `dosing_quality`, `process_integrity`), clamped to `[0.001, 0.999]`, with anti-cheat and timeout logic (`app/env/reward_router.py`, `app/env/anti_cheat.py`, `docs/reward_design.md`). + +## Training And Post-Training Strategy + +Build corpora (`scripts/bootstrap_data.py`, `scripts/build_training_corpus.py`), SFT with TRL (`scripts/train_sft_trl.py`), GRPO with environment reward (`scripts/train_grpo_trl.py`), merge adapters (`scripts/merge_adapters_safe.py`), validate inference (`scripts/test_inference_postsave.py`), evaluate and plot (`scripts/evaluate_*.py`, `docs/results/`). Optional HF GPU training uses `scripts/deploy_training_space.py`; public review should start with the repository root [`README.md`](../README.md), then `docs/training.md` for implementation notes. + +## Documentation index + +- [Architecture](docs/architecture.md) +- [Environment](docs/environment_design.md) +- [Rewards](docs/reward_design.md) +- [Training](docs/training.md) +- [Evaluation](docs/evaluation.md) +- [Deployment](docs/deployment.md) +- [Datasets](docs/datasets.md) +- [Participant guide traceability](docs/participant_guide_traceability.md) +- [Idea doc vs implementation](docs/idea_document_traceability.md) +- [Submission artifact index](docs/submission_artifacts.md) +- [**Space UI demo script**](docs/DEMO_RECORDING_SCRIPT.md) diff --git a/__init__.py b/polyguard-rl/__init__.py similarity index 100% rename from __init__.py rename to polyguard-rl/__init__.py diff --git a/app/__init__.py b/polyguard-rl/app/__init__.py similarity index 100% rename from app/__init__.py rename to polyguard-rl/app/__init__.py diff --git a/app/agents/__init__.py b/polyguard-rl/app/agents/__init__.py similarity index 100% rename from app/agents/__init__.py rename to polyguard-rl/app/agents/__init__.py diff --git a/app/agents/candidate_agent.py b/polyguard-rl/app/agents/candidate_agent.py similarity index 100% rename from app/agents/candidate_agent.py rename to polyguard-rl/app/agents/candidate_agent.py diff --git a/app/agents/critic_agent.py b/polyguard-rl/app/agents/critic_agent.py similarity index 100% rename from app/agents/critic_agent.py rename to polyguard-rl/app/agents/critic_agent.py diff --git a/app/agents/critic_safety_agent.py b/polyguard-rl/app/agents/critic_safety_agent.py similarity index 100% rename from app/agents/critic_safety_agent.py rename to polyguard-rl/app/agents/critic_safety_agent.py diff --git a/app/agents/dosing_agent.py b/polyguard-rl/app/agents/dosing_agent.py similarity index 100% rename from app/agents/dosing_agent.py rename to polyguard-rl/app/agents/dosing_agent.py diff --git a/app/agents/evidence_agent.py b/polyguard-rl/app/agents/evidence_agent.py similarity index 100% rename from app/agents/evidence_agent.py rename to polyguard-rl/app/agents/evidence_agent.py diff --git a/app/agents/explainer_agent.py b/polyguard-rl/app/agents/explainer_agent.py similarity index 100% rename from app/agents/explainer_agent.py rename to polyguard-rl/app/agents/explainer_agent.py diff --git a/app/agents/graph_agent.py b/polyguard-rl/app/agents/graph_agent.py similarity index 100% rename from app/agents/graph_agent.py rename to polyguard-rl/app/agents/graph_agent.py diff --git a/app/agents/graph_safety_agent.py b/polyguard-rl/app/agents/graph_safety_agent.py similarity index 100% rename from app/agents/graph_safety_agent.py rename to polyguard-rl/app/agents/graph_safety_agent.py diff --git a/app/agents/medrec_agent.py b/polyguard-rl/app/agents/medrec_agent.py similarity index 100% rename from app/agents/medrec_agent.py rename to polyguard-rl/app/agents/medrec_agent.py diff --git a/app/agents/orchestrator.py b/polyguard-rl/app/agents/orchestrator.py similarity index 100% rename from app/agents/orchestrator.py rename to polyguard-rl/app/agents/orchestrator.py diff --git a/app/agents/planner_agent.py b/polyguard-rl/app/agents/planner_agent.py similarity index 100% rename from app/agents/planner_agent.py rename to polyguard-rl/app/agents/planner_agent.py diff --git a/app/agents/supervisor_agent.py b/polyguard-rl/app/agents/supervisor_agent.py similarity index 100% rename from app/agents/supervisor_agent.py rename to polyguard-rl/app/agents/supervisor_agent.py diff --git a/app/api/__init__.py b/polyguard-rl/app/api/__init__.py similarity index 100% rename from app/api/__init__.py rename to polyguard-rl/app/api/__init__.py diff --git a/app/api/__main__.py b/polyguard-rl/app/api/__main__.py similarity index 100% rename from app/api/__main__.py rename to polyguard-rl/app/api/__main__.py diff --git a/app/api/dependencies.py b/polyguard-rl/app/api/dependencies.py similarity index 100% rename from app/api/dependencies.py rename to polyguard-rl/app/api/dependencies.py diff --git a/app/api/main.py b/polyguard-rl/app/api/main.py similarity index 100% rename from app/api/main.py rename to polyguard-rl/app/api/main.py diff --git a/app/api/routes.py b/polyguard-rl/app/api/routes.py similarity index 100% rename from app/api/routes.py rename to polyguard-rl/app/api/routes.py diff --git a/app/api/schemas.py b/polyguard-rl/app/api/schemas.py similarity index 100% rename from app/api/schemas.py rename to polyguard-rl/app/api/schemas.py diff --git a/app/api/service.py b/polyguard-rl/app/api/service.py similarity index 100% rename from app/api/service.py rename to polyguard-rl/app/api/service.py diff --git a/app/common/config.py b/polyguard-rl/app/common/config.py similarity index 100% rename from app/common/config.py rename to polyguard-rl/app/common/config.py diff --git a/app/common/constants.py b/polyguard-rl/app/common/constants.py similarity index 100% rename from app/common/constants.py rename to polyguard-rl/app/common/constants.py diff --git a/app/common/enums.py b/polyguard-rl/app/common/enums.py similarity index 100% rename from app/common/enums.py rename to polyguard-rl/app/common/enums.py diff --git a/app/common/exceptions.py b/polyguard-rl/app/common/exceptions.py similarity index 100% rename from app/common/exceptions.py rename to polyguard-rl/app/common/exceptions.py diff --git a/app/common/json_utils.py b/polyguard-rl/app/common/json_utils.py similarity index 100% rename from app/common/json_utils.py rename to polyguard-rl/app/common/json_utils.py diff --git a/app/common/logging_utils.py b/polyguard-rl/app/common/logging_utils.py similarity index 100% rename from app/common/logging_utils.py rename to polyguard-rl/app/common/logging_utils.py diff --git a/app/common/normalization.py b/polyguard-rl/app/common/normalization.py similarity index 100% rename from app/common/normalization.py rename to polyguard-rl/app/common/normalization.py diff --git a/app/common/seeding.py b/polyguard-rl/app/common/seeding.py similarity index 100% rename from app/common/seeding.py rename to polyguard-rl/app/common/seeding.py diff --git a/app/common/types.py b/polyguard-rl/app/common/types.py similarity index 100% rename from app/common/types.py rename to polyguard-rl/app/common/types.py diff --git a/app/dataops/__init__.py b/polyguard-rl/app/dataops/__init__.py similarity index 100% rename from app/dataops/__init__.py rename to polyguard-rl/app/dataops/__init__.py diff --git a/app/dataops/ddi_api.py b/polyguard-rl/app/dataops/ddi_api.py similarity index 100% rename from app/dataops/ddi_api.py rename to polyguard-rl/app/dataops/ddi_api.py diff --git a/app/dataops/normalizer.py b/polyguard-rl/app/dataops/normalizer.py similarity index 100% rename from app/dataops/normalizer.py rename to polyguard-rl/app/dataops/normalizer.py diff --git a/app/dataops/package_loader.py b/polyguard-rl/app/dataops/package_loader.py similarity index 100% rename from app/dataops/package_loader.py rename to polyguard-rl/app/dataops/package_loader.py diff --git a/app/dataops/parser.py b/polyguard-rl/app/dataops/parser.py similarity index 100% rename from app/dataops/parser.py rename to polyguard-rl/app/dataops/parser.py diff --git a/app/dataops/provenance.py b/polyguard-rl/app/dataops/provenance.py similarity index 100% rename from app/dataops/provenance.py rename to polyguard-rl/app/dataops/provenance.py diff --git a/app/dataops/scraper.py b/polyguard-rl/app/dataops/scraper.py similarity index 100% rename from app/dataops/scraper.py rename to polyguard-rl/app/dataops/scraper.py diff --git a/app/dataops/source_manager.py b/polyguard-rl/app/dataops/source_manager.py similarity index 100% rename from app/dataops/source_manager.py rename to polyguard-rl/app/dataops/source_manager.py diff --git a/app/dataops/synthetic_mix.py b/polyguard-rl/app/dataops/synthetic_mix.py similarity index 100% rename from app/dataops/synthetic_mix.py rename to polyguard-rl/app/dataops/synthetic_mix.py diff --git a/app/dataops/web_agent.py b/polyguard-rl/app/dataops/web_agent.py similarity index 100% rename from app/dataops/web_agent.py rename to polyguard-rl/app/dataops/web_agent.py diff --git a/app/dataops/web_fallback.py b/polyguard-rl/app/dataops/web_fallback.py similarity index 100% rename from app/dataops/web_fallback.py rename to polyguard-rl/app/dataops/web_fallback.py diff --git a/app/env/__init__.py b/polyguard-rl/app/env/__init__.py similarity index 100% rename from app/env/__init__.py rename to polyguard-rl/app/env/__init__.py diff --git a/app/env/actions.py b/polyguard-rl/app/env/actions.py similarity index 100% rename from app/env/actions.py rename to polyguard-rl/app/env/actions.py diff --git a/app/env/anti_cheat.py b/polyguard-rl/app/env/anti_cheat.py similarity index 100% rename from app/env/anti_cheat.py rename to polyguard-rl/app/env/anti_cheat.py diff --git a/app/env/catalog.py b/polyguard-rl/app/env/catalog.py similarity index 100% rename from app/env/catalog.py rename to polyguard-rl/app/env/catalog.py diff --git a/app/env/client.py b/polyguard-rl/app/env/client.py similarity index 100% rename from app/env/client.py rename to polyguard-rl/app/env/client.py diff --git a/app/env/curriculum.py b/polyguard-rl/app/env/curriculum.py similarity index 100% rename from app/env/curriculum.py rename to polyguard-rl/app/env/curriculum.py diff --git a/app/env/env_core.py b/polyguard-rl/app/env/env_core.py similarity index 100% rename from app/env/env_core.py rename to polyguard-rl/app/env/env_core.py diff --git a/app/env/environment_a.py b/polyguard-rl/app/env/environment_a.py similarity index 100% rename from app/env/environment_a.py rename to polyguard-rl/app/env/environment_a.py diff --git a/app/env/environment_b.py b/polyguard-rl/app/env/environment_b.py similarity index 100% rename from app/env/environment_b.py rename to polyguard-rl/app/env/environment_b.py diff --git a/app/env/environment_c.py b/polyguard-rl/app/env/environment_c.py similarity index 100% rename from app/env/environment_c.py rename to polyguard-rl/app/env/environment_c.py diff --git a/app/env/environment_d.py b/polyguard-rl/app/env/environment_d.py similarity index 100% rename from app/env/environment_d.py rename to polyguard-rl/app/env/environment_d.py diff --git a/app/env/fastapi_app.py b/polyguard-rl/app/env/fastapi_app.py similarity index 100% rename from app/env/fastapi_app.py rename to polyguard-rl/app/env/fastapi_app.py diff --git a/app/env/observations.py b/polyguard-rl/app/env/observations.py similarity index 100% rename from app/env/observations.py rename to polyguard-rl/app/env/observations.py diff --git a/app/env/replay.py b/polyguard-rl/app/env/replay.py similarity index 100% rename from app/env/replay.py rename to polyguard-rl/app/env/replay.py diff --git a/app/env/reward_router.py b/polyguard-rl/app/env/reward_router.py similarity index 100% rename from app/env/reward_router.py rename to polyguard-rl/app/env/reward_router.py diff --git a/app/env/reward_scaling.py b/polyguard-rl/app/env/reward_scaling.py similarity index 100% rename from app/env/reward_scaling.py rename to polyguard-rl/app/env/reward_scaling.py diff --git a/app/env/scenario_loader.py b/polyguard-rl/app/env/scenario_loader.py similarity index 100% rename from app/env/scenario_loader.py rename to polyguard-rl/app/env/scenario_loader.py diff --git a/app/env/state.py b/polyguard-rl/app/env/state.py similarity index 100% rename from app/env/state.py rename to polyguard-rl/app/env/state.py diff --git a/app/env/termination.py b/polyguard-rl/app/env/termination.py similarity index 100% rename from app/env/termination.py rename to polyguard-rl/app/env/termination.py diff --git a/app/env/transition.py b/polyguard-rl/app/env/transition.py similarity index 100% rename from app/env/transition.py rename to polyguard-rl/app/env/transition.py diff --git a/app/env/verifier.py b/polyguard-rl/app/env/verifier.py similarity index 100% rename from app/env/verifier.py rename to polyguard-rl/app/env/verifier.py diff --git a/app/evaluation/__init__.py b/polyguard-rl/app/evaluation/__init__.py similarity index 100% rename from app/evaluation/__init__.py rename to polyguard-rl/app/evaluation/__init__.py diff --git a/app/evaluation/abstention_eval.py b/polyguard-rl/app/evaluation/abstention_eval.py similarity index 100% rename from app/evaluation/abstention_eval.py rename to polyguard-rl/app/evaluation/abstention_eval.py diff --git a/app/evaluation/benchmark_report.py b/polyguard-rl/app/evaluation/benchmark_report.py similarity index 100% rename from app/evaluation/benchmark_report.py rename to polyguard-rl/app/evaluation/benchmark_report.py diff --git a/app/evaluation/calibration_eval.py b/polyguard-rl/app/evaluation/calibration_eval.py similarity index 100% rename from app/evaluation/calibration_eval.py rename to polyguard-rl/app/evaluation/calibration_eval.py diff --git a/app/evaluation/dosing_eval.py b/polyguard-rl/app/evaluation/dosing_eval.py similarity index 100% rename from app/evaluation/dosing_eval.py rename to polyguard-rl/app/evaluation/dosing_eval.py diff --git a/app/evaluation/explainability_eval.py b/polyguard-rl/app/evaluation/explainability_eval.py similarity index 100% rename from app/evaluation/explainability_eval.py rename to polyguard-rl/app/evaluation/explainability_eval.py diff --git a/app/evaluation/offline_policy_eval.py b/polyguard-rl/app/evaluation/offline_policy_eval.py similarity index 100% rename from app/evaluation/offline_policy_eval.py rename to polyguard-rl/app/evaluation/offline_policy_eval.py diff --git a/app/evaluation/plotting.py b/polyguard-rl/app/evaluation/plotting.py similarity index 100% rename from app/evaluation/plotting.py rename to polyguard-rl/app/evaluation/plotting.py diff --git a/app/evaluation/process_eval.py b/polyguard-rl/app/evaluation/process_eval.py similarity index 100% rename from app/evaluation/process_eval.py rename to polyguard-rl/app/evaluation/process_eval.py diff --git a/app/evaluation/robustness_eval.py b/polyguard-rl/app/evaluation/robustness_eval.py similarity index 100% rename from app/evaluation/robustness_eval.py rename to polyguard-rl/app/evaluation/robustness_eval.py diff --git a/app/evaluation/run_all.py b/polyguard-rl/app/evaluation/run_all.py similarity index 100% rename from app/evaluation/run_all.py rename to polyguard-rl/app/evaluation/run_all.py diff --git a/app/evaluation/safety_eval.py b/polyguard-rl/app/evaluation/safety_eval.py similarity index 100% rename from app/evaluation/safety_eval.py rename to polyguard-rl/app/evaluation/safety_eval.py diff --git a/app/evaluation/simulator_rollouts.py b/polyguard-rl/app/evaluation/simulator_rollouts.py similarity index 100% rename from app/evaluation/simulator_rollouts.py rename to polyguard-rl/app/evaluation/simulator_rollouts.py diff --git a/app/evaluation/subgroup_eval.py b/polyguard-rl/app/evaluation/subgroup_eval.py similarity index 100% rename from app/evaluation/subgroup_eval.py rename to polyguard-rl/app/evaluation/subgroup_eval.py diff --git a/app/hf_space/Dockerfile b/polyguard-rl/app/hf_space/Dockerfile similarity index 100% rename from app/hf_space/Dockerfile rename to polyguard-rl/app/hf_space/Dockerfile diff --git a/app/hf_space/__init__.py b/polyguard-rl/app/hf_space/__init__.py similarity index 100% rename from app/hf_space/__init__.py rename to polyguard-rl/app/hf_space/__init__.py diff --git a/app/hf_space/evidence_runner.py b/polyguard-rl/app/hf_space/evidence_runner.py similarity index 100% rename from app/hf_space/evidence_runner.py rename to polyguard-rl/app/hf_space/evidence_runner.py diff --git a/app/hf_space/training_runner.py b/polyguard-rl/app/hf_space/training_runner.py similarity index 100% rename from app/hf_space/training_runner.py rename to polyguard-rl/app/hf_space/training_runner.py diff --git a/app/knowledge/__init__.py b/polyguard-rl/app/knowledge/__init__.py similarity index 100% rename from app/knowledge/__init__.py rename to polyguard-rl/app/knowledge/__init__.py diff --git a/app/knowledge/burden_scores.py b/polyguard-rl/app/knowledge/burden_scores.py similarity index 100% rename from app/knowledge/burden_scores.py rename to polyguard-rl/app/knowledge/burden_scores.py diff --git a/app/knowledge/ddi_knowledge.py b/polyguard-rl/app/knowledge/ddi_knowledge.py similarity index 100% rename from app/knowledge/ddi_knowledge.py rename to polyguard-rl/app/knowledge/ddi_knowledge.py diff --git a/app/knowledge/drug_catalog.py b/polyguard-rl/app/knowledge/drug_catalog.py similarity index 100% rename from app/knowledge/drug_catalog.py rename to polyguard-rl/app/knowledge/drug_catalog.py diff --git a/app/knowledge/duplicate_therapy_rules.py b/polyguard-rl/app/knowledge/duplicate_therapy_rules.py similarity index 100% rename from app/knowledge/duplicate_therapy_rules.py rename to polyguard-rl/app/knowledge/duplicate_therapy_rules.py diff --git a/app/knowledge/evidence_retriever.py b/polyguard-rl/app/knowledge/evidence_retriever.py similarity index 100% rename from app/knowledge/evidence_retriever.py rename to polyguard-rl/app/knowledge/evidence_retriever.py diff --git a/app/knowledge/guideline_fragments.py b/polyguard-rl/app/knowledge/guideline_fragments.py similarity index 100% rename from app/knowledge/guideline_fragments.py rename to polyguard-rl/app/knowledge/guideline_fragments.py diff --git a/app/knowledge/hepatic_rules.py b/polyguard-rl/app/knowledge/hepatic_rules.py similarity index 100% rename from app/knowledge/hepatic_rules.py rename to polyguard-rl/app/knowledge/hepatic_rules.py diff --git a/app/knowledge/literature_index.py b/polyguard-rl/app/knowledge/literature_index.py similarity index 100% rename from app/knowledge/literature_index.py rename to polyguard-rl/app/knowledge/literature_index.py diff --git a/app/knowledge/renal_rules.py b/polyguard-rl/app/knowledge/renal_rules.py similarity index 100% rename from app/knowledge/renal_rules.py rename to polyguard-rl/app/knowledge/renal_rules.py diff --git a/app/knowledge/side_effect_ontology.py b/polyguard-rl/app/knowledge/side_effect_ontology.py similarity index 100% rename from app/knowledge/side_effect_ontology.py rename to polyguard-rl/app/knowledge/side_effect_ontology.py diff --git a/app/knowledge/substitution_rules.py b/polyguard-rl/app/knowledge/substitution_rules.py similarity index 100% rename from app/knowledge/substitution_rules.py rename to polyguard-rl/app/knowledge/substitution_rules.py diff --git a/app/knowledge/taper_rules.py b/polyguard-rl/app/knowledge/taper_rules.py similarity index 100% rename from app/knowledge/taper_rules.py rename to polyguard-rl/app/knowledge/taper_rules.py diff --git a/app/models/__init__.py b/polyguard-rl/app/models/__init__.py similarity index 100% rename from app/models/__init__.py rename to polyguard-rl/app/models/__init__.py diff --git a/app/models/baselines/__init__.py b/polyguard-rl/app/models/baselines/__init__.py similarity index 100% rename from app/models/baselines/__init__.py rename to polyguard-rl/app/models/baselines/__init__.py diff --git a/app/models/baselines/beam_search_planner.py b/polyguard-rl/app/models/baselines/beam_search_planner.py similarity index 100% rename from app/models/baselines/beam_search_planner.py rename to polyguard-rl/app/models/baselines/beam_search_planner.py diff --git a/app/models/baselines/contextual_bandit.py b/polyguard-rl/app/models/baselines/contextual_bandit.py similarity index 100% rename from app/models/baselines/contextual_bandit.py rename to polyguard-rl/app/models/baselines/contextual_bandit.py diff --git a/app/models/baselines/contextual_bandit_policy.py b/polyguard-rl/app/models/baselines/contextual_bandit_policy.py similarity index 100% rename from app/models/baselines/contextual_bandit_policy.py rename to polyguard-rl/app/models/baselines/contextual_bandit_policy.py diff --git a/app/models/baselines/greedy_regimen.py b/polyguard-rl/app/models/baselines/greedy_regimen.py similarity index 100% rename from app/models/baselines/greedy_regimen.py rename to polyguard-rl/app/models/baselines/greedy_regimen.py diff --git a/app/models/baselines/imitation.py b/polyguard-rl/app/models/baselines/imitation.py similarity index 100% rename from app/models/baselines/imitation.py rename to polyguard-rl/app/models/baselines/imitation.py diff --git a/app/models/baselines/no_change.py b/polyguard-rl/app/models/baselines/no_change.py similarity index 100% rename from app/models/baselines/no_change.py rename to polyguard-rl/app/models/baselines/no_change.py diff --git a/app/models/baselines/rules_only.py b/polyguard-rl/app/models/baselines/rules_only.py similarity index 100% rename from app/models/baselines/rules_only.py rename to polyguard-rl/app/models/baselines/rules_only.py diff --git a/app/models/dosing/__init__.py b/polyguard-rl/app/models/dosing/__init__.py similarity index 100% rename from app/models/dosing/__init__.py rename to polyguard-rl/app/models/dosing/__init__.py diff --git a/app/models/dosing/dose_policy_features.py b/polyguard-rl/app/models/dosing/dose_policy_features.py similarity index 100% rename from app/models/dosing/dose_policy_features.py rename to polyguard-rl/app/models/dosing/dose_policy_features.py diff --git a/app/models/dosing/infer.py b/polyguard-rl/app/models/dosing/infer.py similarity index 100% rename from app/models/dosing/infer.py rename to polyguard-rl/app/models/dosing/infer.py diff --git a/app/models/dosing/pkpd_state.py b/polyguard-rl/app/models/dosing/pkpd_state.py similarity index 100% rename from app/models/dosing/pkpd_state.py rename to polyguard-rl/app/models/dosing/pkpd_state.py diff --git a/app/models/dosing/surrogate_pkpd.py b/polyguard-rl/app/models/dosing/surrogate_pkpd.py similarity index 100% rename from app/models/dosing/surrogate_pkpd.py rename to polyguard-rl/app/models/dosing/surrogate_pkpd.py diff --git a/app/models/dosing/train.py b/polyguard-rl/app/models/dosing/train.py similarity index 100% rename from app/models/dosing/train.py rename to polyguard-rl/app/models/dosing/train.py diff --git a/app/models/dosing/train_supervised.py b/polyguard-rl/app/models/dosing/train_supervised.py similarity index 100% rename from app/models/dosing/train_supervised.py rename to polyguard-rl/app/models/dosing/train_supervised.py diff --git a/app/models/graph/__init__.py b/polyguard-rl/app/models/graph/__init__.py similarity index 100% rename from app/models/graph/__init__.py rename to polyguard-rl/app/models/graph/__init__.py diff --git a/app/models/graph/dataset.py b/polyguard-rl/app/models/graph/dataset.py similarity index 100% rename from app/models/graph/dataset.py rename to polyguard-rl/app/models/graph/dataset.py diff --git a/app/models/graph/hetero_encoder.py b/polyguard-rl/app/models/graph/hetero_encoder.py similarity index 100% rename from app/models/graph/hetero_encoder.py rename to polyguard-rl/app/models/graph/hetero_encoder.py diff --git a/app/models/graph/infer.py b/polyguard-rl/app/models/graph/infer.py similarity index 100% rename from app/models/graph/infer.py rename to polyguard-rl/app/models/graph/infer.py diff --git a/app/models/graph/pairwise_ddi_head.py b/polyguard-rl/app/models/graph/pairwise_ddi_head.py similarity index 100% rename from app/models/graph/pairwise_ddi_head.py rename to polyguard-rl/app/models/graph/pairwise_ddi_head.py diff --git a/app/models/graph/regimen_embedder.py b/polyguard-rl/app/models/graph/regimen_embedder.py similarity index 100% rename from app/models/graph/regimen_embedder.py rename to polyguard-rl/app/models/graph/regimen_embedder.py diff --git a/app/models/graph/severe_alert_head.py b/polyguard-rl/app/models/graph/severe_alert_head.py similarity index 100% rename from app/models/graph/severe_alert_head.py rename to polyguard-rl/app/models/graph/severe_alert_head.py diff --git a/app/models/graph/side_effect_head.py b/polyguard-rl/app/models/graph/side_effect_head.py similarity index 100% rename from app/models/graph/side_effect_head.py rename to polyguard-rl/app/models/graph/side_effect_head.py diff --git a/app/models/graph/train.py b/polyguard-rl/app/models/graph/train.py similarity index 100% rename from app/models/graph/train.py rename to polyguard-rl/app/models/graph/train.py diff --git a/app/models/policy/__init__.py b/polyguard-rl/app/models/policy/__init__.py similarity index 100% rename from app/models/policy/__init__.py rename to polyguard-rl/app/models/policy/__init__.py diff --git a/app/models/policy/abstention.py b/polyguard-rl/app/models/policy/abstention.py similarity index 100% rename from app/models/policy/abstention.py rename to polyguard-rl/app/models/policy/abstention.py diff --git a/app/models/policy/active_model.py b/polyguard-rl/app/models/policy/active_model.py similarity index 100% rename from app/models/policy/active_model.py rename to polyguard-rl/app/models/policy/active_model.py diff --git a/app/models/policy/candidate_builder.py b/polyguard-rl/app/models/policy/candidate_builder.py similarity index 100% rename from app/models/policy/candidate_builder.py rename to polyguard-rl/app/models/policy/candidate_builder.py diff --git a/app/models/policy/output_schema.py b/polyguard-rl/app/models/policy/output_schema.py similarity index 100% rename from app/models/policy/output_schema.py rename to polyguard-rl/app/models/policy/output_schema.py diff --git a/app/models/policy/parser.py b/polyguard-rl/app/models/policy/parser.py similarity index 100% rename from app/models/policy/parser.py rename to polyguard-rl/app/models/policy/parser.py diff --git a/app/models/policy/policy_io.py b/polyguard-rl/app/models/policy/policy_io.py similarity index 100% rename from app/models/policy/policy_io.py rename to polyguard-rl/app/models/policy/policy_io.py diff --git a/app/models/policy/prompt_templates.py b/polyguard-rl/app/models/policy/prompt_templates.py similarity index 100% rename from app/models/policy/prompt_templates.py rename to polyguard-rl/app/models/policy/prompt_templates.py diff --git a/app/models/policy/provider_runtime.py b/polyguard-rl/app/models/policy/provider_runtime.py similarity index 100% rename from app/models/policy/provider_runtime.py rename to polyguard-rl/app/models/policy/provider_runtime.py diff --git a/app/models/policy/repair.py b/polyguard-rl/app/models/policy/repair.py similarity index 100% rename from app/models/policy/repair.py rename to polyguard-rl/app/models/policy/repair.py diff --git a/app/models/policy/safety_ranker.py b/polyguard-rl/app/models/policy/safety_ranker.py similarity index 100% rename from app/models/policy/safety_ranker.py rename to polyguard-rl/app/models/policy/safety_ranker.py diff --git a/app/models/policy/uncertainty.py b/polyguard-rl/app/models/policy/uncertainty.py similarity index 100% rename from app/models/policy/uncertainty.py rename to polyguard-rl/app/models/policy/uncertainty.py diff --git a/app/models/retrieval/__init__.py b/polyguard-rl/app/models/retrieval/__init__.py similarity index 100% rename from app/models/retrieval/__init__.py rename to polyguard-rl/app/models/retrieval/__init__.py diff --git a/app/models/retrieval/chunker.py b/polyguard-rl/app/models/retrieval/chunker.py similarity index 100% rename from app/models/retrieval/chunker.py rename to polyguard-rl/app/models/retrieval/chunker.py diff --git a/app/models/retrieval/embedder.py b/polyguard-rl/app/models/retrieval/embedder.py similarity index 100% rename from app/models/retrieval/embedder.py rename to polyguard-rl/app/models/retrieval/embedder.py diff --git a/app/models/retrieval/index.py b/polyguard-rl/app/models/retrieval/index.py similarity index 100% rename from app/models/retrieval/index.py rename to polyguard-rl/app/models/retrieval/index.py diff --git a/app/models/retrieval/reranker.py b/polyguard-rl/app/models/retrieval/reranker.py similarity index 100% rename from app/models/retrieval/reranker.py rename to polyguard-rl/app/models/retrieval/reranker.py diff --git a/app/models/retrieval/retriever.py b/polyguard-rl/app/models/retrieval/retriever.py similarity index 100% rename from app/models/retrieval/retriever.py rename to polyguard-rl/app/models/retrieval/retriever.py diff --git a/app/models/tabular/__init__.py b/polyguard-rl/app/models/tabular/__init__.py similarity index 100% rename from app/models/tabular/__init__.py rename to polyguard-rl/app/models/tabular/__init__.py diff --git a/app/models/tabular/calibration.py b/polyguard-rl/app/models/tabular/calibration.py similarity index 100% rename from app/models/tabular/calibration.py rename to polyguard-rl/app/models/tabular/calibration.py diff --git a/app/models/tabular/features.py b/polyguard-rl/app/models/tabular/features.py similarity index 100% rename from app/models/tabular/features.py rename to polyguard-rl/app/models/tabular/features.py diff --git a/app/models/tabular/infer.py b/polyguard-rl/app/models/tabular/infer.py similarity index 100% rename from app/models/tabular/infer.py rename to polyguard-rl/app/models/tabular/infer.py diff --git a/app/models/tabular/risk_heads.py b/polyguard-rl/app/models/tabular/risk_heads.py similarity index 100% rename from app/models/tabular/risk_heads.py rename to polyguard-rl/app/models/tabular/risk_heads.py diff --git a/app/models/tabular/train.py b/polyguard-rl/app/models/tabular/train.py similarity index 100% rename from app/models/tabular/train.py rename to polyguard-rl/app/models/tabular/train.py diff --git a/app/simulator/__init__.py b/polyguard-rl/app/simulator/__init__.py similarity index 100% rename from app/simulator/__init__.py rename to polyguard-rl/app/simulator/__init__.py diff --git a/app/simulator/ade_event_model.py b/polyguard-rl/app/simulator/ade_event_model.py similarity index 100% rename from app/simulator/ade_event_model.py rename to polyguard-rl/app/simulator/ade_event_model.py diff --git a/app/simulator/adherence_dynamics.py b/polyguard-rl/app/simulator/adherence_dynamics.py similarity index 100% rename from app/simulator/adherence_dynamics.py rename to polyguard-rl/app/simulator/adherence_dynamics.py diff --git a/app/simulator/burden_model.py b/polyguard-rl/app/simulator/burden_model.py similarity index 100% rename from app/simulator/burden_model.py rename to polyguard-rl/app/simulator/burden_model.py diff --git a/app/simulator/ddi_event_model.py b/polyguard-rl/app/simulator/ddi_event_model.py similarity index 100% rename from app/simulator/ddi_event_model.py rename to polyguard-rl/app/simulator/ddi_event_model.py diff --git a/app/simulator/disease_dynamics.py b/polyguard-rl/app/simulator/disease_dynamics.py similarity index 100% rename from app/simulator/disease_dynamics.py rename to polyguard-rl/app/simulator/disease_dynamics.py diff --git a/app/simulator/dose_response.py b/polyguard-rl/app/simulator/dose_response.py similarity index 100% rename from app/simulator/dose_response.py rename to polyguard-rl/app/simulator/dose_response.py diff --git a/app/simulator/lab_dynamics.py b/polyguard-rl/app/simulator/lab_dynamics.py similarity index 100% rename from app/simulator/lab_dynamics.py rename to polyguard-rl/app/simulator/lab_dynamics.py diff --git a/app/simulator/latent_confounders.py b/polyguard-rl/app/simulator/latent_confounders.py similarity index 100% rename from app/simulator/latent_confounders.py rename to polyguard-rl/app/simulator/latent_confounders.py diff --git a/app/simulator/medication_effects.py b/polyguard-rl/app/simulator/medication_effects.py similarity index 100% rename from app/simulator/medication_effects.py rename to polyguard-rl/app/simulator/medication_effects.py diff --git a/app/simulator/patient_generator.py b/polyguard-rl/app/simulator/patient_generator.py similarity index 100% rename from app/simulator/patient_generator.py rename to polyguard-rl/app/simulator/patient_generator.py diff --git a/app/simulator/scenario_generator.py b/polyguard-rl/app/simulator/scenario_generator.py similarity index 100% rename from app/simulator/scenario_generator.py rename to polyguard-rl/app/simulator/scenario_generator.py diff --git a/app/simulator/uncertainty_model.py b/polyguard-rl/app/simulator/uncertainty_model.py similarity index 100% rename from app/simulator/uncertainty_model.py rename to polyguard-rl/app/simulator/uncertainty_model.py diff --git a/app/simulator/utilization_risk.py b/polyguard-rl/app/simulator/utilization_risk.py similarity index 100% rename from app/simulator/utilization_risk.py rename to polyguard-rl/app/simulator/utilization_risk.py diff --git a/app/tools/__init__.py b/polyguard-rl/app/tools/__init__.py similarity index 100% rename from app/tools/__init__.py rename to polyguard-rl/app/tools/__init__.py diff --git a/app/tools/medication_alternatives.py b/polyguard-rl/app/tools/medication_alternatives.py similarity index 100% rename from app/tools/medication_alternatives.py rename to polyguard-rl/app/tools/medication_alternatives.py diff --git a/app/training/__init__.py b/polyguard-rl/app/training/__init__.py similarity index 100% rename from app/training/__init__.py rename to polyguard-rl/app/training/__init__.py diff --git a/app/training/callbacks.py b/polyguard-rl/app/training/callbacks.py similarity index 100% rename from app/training/callbacks.py rename to polyguard-rl/app/training/callbacks.py diff --git a/app/training/checkpointing.py b/polyguard-rl/app/training/checkpointing.py similarity index 100% rename from app/training/checkpointing.py rename to polyguard-rl/app/training/checkpointing.py diff --git a/app/training/dosing_grpo.py b/polyguard-rl/app/training/dosing_grpo.py similarity index 100% rename from app/training/dosing_grpo.py rename to polyguard-rl/app/training/dosing_grpo.py diff --git a/app/training/generation.py b/polyguard-rl/app/training/generation.py similarity index 100% rename from app/training/generation.py rename to polyguard-rl/app/training/generation.py diff --git a/app/training/grpo_dosing.py b/polyguard-rl/app/training/grpo_dosing.py similarity index 100% rename from app/training/grpo_dosing.py rename to polyguard-rl/app/training/grpo_dosing.py diff --git a/app/training/grpo_experiment.py b/polyguard-rl/app/training/grpo_experiment.py similarity index 100% rename from app/training/grpo_experiment.py rename to polyguard-rl/app/training/grpo_experiment.py diff --git a/app/training/grpo_planner.py b/polyguard-rl/app/training/grpo_planner.py similarity index 100% rename from app/training/grpo_planner.py rename to polyguard-rl/app/training/grpo_planner.py diff --git a/app/training/grpo_supervisor.py b/polyguard-rl/app/training/grpo_supervisor.py similarity index 100% rename from app/training/grpo_supervisor.py rename to polyguard-rl/app/training/grpo_supervisor.py diff --git a/app/training/grpo_trl.py b/polyguard-rl/app/training/grpo_trl.py similarity index 100% rename from app/training/grpo_trl.py rename to polyguard-rl/app/training/grpo_trl.py diff --git a/app/training/lora_utils.py b/polyguard-rl/app/training/lora_utils.py similarity index 100% rename from app/training/lora_utils.py rename to polyguard-rl/app/training/lora_utils.py diff --git a/app/training/metrics.py b/polyguard-rl/app/training/metrics.py similarity index 100% rename from app/training/metrics.py rename to polyguard-rl/app/training/metrics.py diff --git a/app/training/model_registry.py b/polyguard-rl/app/training/model_registry.py similarity index 100% rename from app/training/model_registry.py rename to polyguard-rl/app/training/model_registry.py diff --git a/app/training/openenv_wrapper.py b/polyguard-rl/app/training/openenv_wrapper.py similarity index 100% rename from app/training/openenv_wrapper.py rename to polyguard-rl/app/training/openenv_wrapper.py diff --git a/app/training/planner_grpo.py b/polyguard-rl/app/training/planner_grpo.py similarity index 100% rename from app/training/planner_grpo.py rename to polyguard-rl/app/training/planner_grpo.py diff --git a/app/training/process_feedback.py b/polyguard-rl/app/training/process_feedback.py similarity index 100% rename from app/training/process_feedback.py rename to polyguard-rl/app/training/process_feedback.py diff --git a/app/training/replay_buffer.py b/polyguard-rl/app/training/replay_buffer.py similarity index 100% rename from app/training/replay_buffer.py rename to polyguard-rl/app/training/replay_buffer.py diff --git a/app/training/reward_functions.py b/polyguard-rl/app/training/reward_functions.py similarity index 100% rename from app/training/reward_functions.py rename to polyguard-rl/app/training/reward_functions.py diff --git a/app/training/rl_dataset.py b/polyguard-rl/app/training/rl_dataset.py similarity index 100% rename from app/training/rl_dataset.py rename to polyguard-rl/app/training/rl_dataset.py diff --git a/app/training/sft_dataset.py b/polyguard-rl/app/training/sft_dataset.py similarity index 100% rename from app/training/sft_dataset.py rename to polyguard-rl/app/training/sft_dataset.py diff --git a/app/training/sft_train.py b/polyguard-rl/app/training/sft_train.py similarity index 100% rename from app/training/sft_train.py rename to polyguard-rl/app/training/sft_train.py diff --git a/app/training/sft_trl.py b/polyguard-rl/app/training/sft_trl.py similarity index 100% rename from app/training/sft_trl.py rename to polyguard-rl/app/training/sft_trl.py diff --git a/app/training/supervisor_grpo.py b/polyguard-rl/app/training/supervisor_grpo.py similarity index 100% rename from app/training/supervisor_grpo.py rename to polyguard-rl/app/training/supervisor_grpo.py diff --git a/app/training/unsloth_loader.py b/polyguard-rl/app/training/unsloth_loader.py similarity index 100% rename from app/training/unsloth_loader.py rename to polyguard-rl/app/training/unsloth_loader.py diff --git a/app/ui/backend.py b/polyguard-rl/app/ui/backend.py similarity index 100% rename from app/ui/backend.py rename to polyguard-rl/app/ui/backend.py diff --git a/app/ui/frontend/index.html b/polyguard-rl/app/ui/frontend/index.html similarity index 100% rename from app/ui/frontend/index.html rename to polyguard-rl/app/ui/frontend/index.html diff --git a/app/ui/frontend/package-lock.json b/polyguard-rl/app/ui/frontend/package-lock.json similarity index 100% rename from app/ui/frontend/package-lock.json rename to polyguard-rl/app/ui/frontend/package-lock.json diff --git a/app/ui/frontend/package.json b/polyguard-rl/app/ui/frontend/package.json similarity index 100% rename from app/ui/frontend/package.json rename to polyguard-rl/app/ui/frontend/package.json diff --git a/app/ui/frontend/public/blackhole.webm b/polyguard-rl/app/ui/frontend/public/blackhole.webm similarity index 100% rename from app/ui/frontend/public/blackhole.webm rename to polyguard-rl/app/ui/frontend/public/blackhole.webm diff --git a/app/ui/frontend/src/App.tsx b/polyguard-rl/app/ui/frontend/src/App.tsx similarity index 100% rename from app/ui/frontend/src/App.tsx rename to polyguard-rl/app/ui/frontend/src/App.tsx diff --git a/app/ui/frontend/src/components/AlternativeMedicineSearch.tsx b/polyguard-rl/app/ui/frontend/src/components/AlternativeMedicineSearch.tsx similarity index 100% rename from app/ui/frontend/src/components/AlternativeMedicineSearch.tsx rename to polyguard-rl/app/ui/frontend/src/components/AlternativeMedicineSearch.tsx diff --git a/app/ui/frontend/src/components/CandidateActions.tsx b/polyguard-rl/app/ui/frontend/src/components/CandidateActions.tsx similarity index 100% rename from app/ui/frontend/src/components/CandidateActions.tsx rename to polyguard-rl/app/ui/frontend/src/components/CandidateActions.tsx diff --git a/app/ui/frontend/src/components/ConstraintWarnings.tsx b/polyguard-rl/app/ui/frontend/src/components/ConstraintWarnings.tsx similarity index 100% rename from app/ui/frontend/src/components/ConstraintWarnings.tsx rename to polyguard-rl/app/ui/frontend/src/components/ConstraintWarnings.tsx diff --git a/app/ui/frontend/src/components/DecisionPanel.tsx b/polyguard-rl/app/ui/frontend/src/components/DecisionPanel.tsx similarity index 100% rename from app/ui/frontend/src/components/DecisionPanel.tsx rename to polyguard-rl/app/ui/frontend/src/components/DecisionPanel.tsx diff --git a/app/ui/frontend/src/components/DosingPanel.tsx b/polyguard-rl/app/ui/frontend/src/components/DosingPanel.tsx similarity index 100% rename from app/ui/frontend/src/components/DosingPanel.tsx rename to polyguard-rl/app/ui/frontend/src/components/DosingPanel.tsx diff --git a/app/ui/frontend/src/components/EpisodeTrace.tsx b/polyguard-rl/app/ui/frontend/src/components/EpisodeTrace.tsx similarity index 100% rename from app/ui/frontend/src/components/EpisodeTrace.tsx rename to polyguard-rl/app/ui/frontend/src/components/EpisodeTrace.tsx diff --git a/app/ui/frontend/src/components/EvidenceDrawer.tsx b/polyguard-rl/app/ui/frontend/src/components/EvidenceDrawer.tsx similarity index 100% rename from app/ui/frontend/src/components/EvidenceDrawer.tsx rename to polyguard-rl/app/ui/frontend/src/components/EvidenceDrawer.tsx diff --git a/app/ui/frontend/src/components/ExplanationPanel.tsx b/polyguard-rl/app/ui/frontend/src/components/ExplanationPanel.tsx similarity index 100% rename from app/ui/frontend/src/components/ExplanationPanel.tsx rename to polyguard-rl/app/ui/frontend/src/components/ExplanationPanel.tsx diff --git a/app/ui/frontend/src/components/MedicationTable.tsx b/polyguard-rl/app/ui/frontend/src/components/MedicationTable.tsx similarity index 100% rename from app/ui/frontend/src/components/MedicationTable.tsx rename to polyguard-rl/app/ui/frontend/src/components/MedicationTable.tsx diff --git a/app/ui/frontend/src/components/MetaverseBackdrop.tsx b/polyguard-rl/app/ui/frontend/src/components/MetaverseBackdrop.tsx similarity index 100% rename from app/ui/frontend/src/components/MetaverseBackdrop.tsx rename to polyguard-rl/app/ui/frontend/src/components/MetaverseBackdrop.tsx diff --git a/app/ui/frontend/src/components/PatientSummaryCard.tsx b/polyguard-rl/app/ui/frontend/src/components/PatientSummaryCard.tsx similarity index 100% rename from app/ui/frontend/src/components/PatientSummaryCard.tsx rename to polyguard-rl/app/ui/frontend/src/components/PatientSummaryCard.tsx diff --git a/app/ui/frontend/src/components/RewardPanel.tsx b/polyguard-rl/app/ui/frontend/src/components/RewardPanel.tsx similarity index 100% rename from app/ui/frontend/src/components/RewardPanel.tsx rename to polyguard-rl/app/ui/frontend/src/components/RewardPanel.tsx diff --git a/app/ui/frontend/src/components/RiskDeltaPanel.tsx b/polyguard-rl/app/ui/frontend/src/components/RiskDeltaPanel.tsx similarity index 100% rename from app/ui/frontend/src/components/RiskDeltaPanel.tsx rename to polyguard-rl/app/ui/frontend/src/components/RiskDeltaPanel.tsx diff --git a/app/ui/frontend/src/components/ScenarioSelector.tsx b/polyguard-rl/app/ui/frontend/src/components/ScenarioSelector.tsx similarity index 100% rename from app/ui/frontend/src/components/ScenarioSelector.tsx rename to polyguard-rl/app/ui/frontend/src/components/ScenarioSelector.tsx diff --git a/app/ui/frontend/src/components/TrainingCharts.tsx b/polyguard-rl/app/ui/frontend/src/components/TrainingCharts.tsx similarity index 100% rename from app/ui/frontend/src/components/TrainingCharts.tsx rename to polyguard-rl/app/ui/frontend/src/components/TrainingCharts.tsx diff --git a/app/ui/frontend/src/lib/api.ts b/polyguard-rl/app/ui/frontend/src/lib/api.ts similarity index 100% rename from app/ui/frontend/src/lib/api.ts rename to polyguard-rl/app/ui/frontend/src/lib/api.ts diff --git a/app/ui/frontend/src/lib/constants.ts b/polyguard-rl/app/ui/frontend/src/lib/constants.ts similarity index 100% rename from app/ui/frontend/src/lib/constants.ts rename to polyguard-rl/app/ui/frontend/src/lib/constants.ts diff --git a/app/ui/frontend/src/lib/types.ts b/polyguard-rl/app/ui/frontend/src/lib/types.ts similarity index 100% rename from app/ui/frontend/src/lib/types.ts rename to polyguard-rl/app/ui/frontend/src/lib/types.ts diff --git a/app/ui/frontend/src/main.tsx b/polyguard-rl/app/ui/frontend/src/main.tsx similarity index 100% rename from app/ui/frontend/src/main.tsx rename to polyguard-rl/app/ui/frontend/src/main.tsx diff --git a/app/ui/frontend/src/pages/Dashboard.tsx b/polyguard-rl/app/ui/frontend/src/pages/Dashboard.tsx similarity index 100% rename from app/ui/frontend/src/pages/Dashboard.tsx rename to polyguard-rl/app/ui/frontend/src/pages/Dashboard.tsx diff --git a/app/ui/frontend/src/pages/EpisodeReplay.tsx b/polyguard-rl/app/ui/frontend/src/pages/EpisodeReplay.tsx similarity index 100% rename from app/ui/frontend/src/pages/EpisodeReplay.tsx rename to polyguard-rl/app/ui/frontend/src/pages/EpisodeReplay.tsx diff --git a/app/ui/frontend/src/pages/Home.tsx b/polyguard-rl/app/ui/frontend/src/pages/Home.tsx similarity index 100% rename from app/ui/frontend/src/pages/Home.tsx rename to polyguard-rl/app/ui/frontend/src/pages/Home.tsx diff --git a/app/ui/frontend/src/pages/PatientWorkbench.tsx b/polyguard-rl/app/ui/frontend/src/pages/PatientWorkbench.tsx similarity index 100% rename from app/ui/frontend/src/pages/PatientWorkbench.tsx rename to polyguard-rl/app/ui/frontend/src/pages/PatientWorkbench.tsx diff --git a/app/ui/frontend/src/pages/PolicyCompare.tsx b/polyguard-rl/app/ui/frontend/src/pages/PolicyCompare.tsx similarity index 100% rename from app/ui/frontend/src/pages/PolicyCompare.tsx rename to polyguard-rl/app/ui/frontend/src/pages/PolicyCompare.tsx diff --git a/app/ui/frontend/src/pages/PolicyLab.tsx b/polyguard-rl/app/ui/frontend/src/pages/PolicyLab.tsx similarity index 100% rename from app/ui/frontend/src/pages/PolicyLab.tsx rename to polyguard-rl/app/ui/frontend/src/pages/PolicyLab.tsx diff --git a/app/ui/frontend/src/pages/PrecisionDosing.tsx b/polyguard-rl/app/ui/frontend/src/pages/PrecisionDosing.tsx similarity index 100% rename from app/ui/frontend/src/pages/PrecisionDosing.tsx rename to polyguard-rl/app/ui/frontend/src/pages/PrecisionDosing.tsx diff --git a/app/ui/frontend/src/pages/SafetyInspector.tsx b/polyguard-rl/app/ui/frontend/src/pages/SafetyInspector.tsx similarity index 100% rename from app/ui/frontend/src/pages/SafetyInspector.tsx rename to polyguard-rl/app/ui/frontend/src/pages/SafetyInspector.tsx diff --git a/app/ui/frontend/src/pages/TrainingMonitor.tsx b/polyguard-rl/app/ui/frontend/src/pages/TrainingMonitor.tsx similarity index 100% rename from app/ui/frontend/src/pages/TrainingMonitor.tsx rename to polyguard-rl/app/ui/frontend/src/pages/TrainingMonitor.tsx diff --git a/app/ui/frontend/src/styles/theme.css b/polyguard-rl/app/ui/frontend/src/styles/theme.css similarity index 100% rename from app/ui/frontend/src/styles/theme.css rename to polyguard-rl/app/ui/frontend/src/styles/theme.css diff --git a/app/ui/frontend/src/vite-env.d.ts b/polyguard-rl/app/ui/frontend/src/vite-env.d.ts similarity index 100% rename from app/ui/frontend/src/vite-env.d.ts rename to polyguard-rl/app/ui/frontend/src/vite-env.d.ts diff --git a/app/ui/frontend/tsconfig.json b/polyguard-rl/app/ui/frontend/tsconfig.json similarity index 100% rename from app/ui/frontend/tsconfig.json rename to polyguard-rl/app/ui/frontend/tsconfig.json diff --git a/app/ui/frontend/vite.config.ts b/polyguard-rl/app/ui/frontend/vite.config.ts similarity index 100% rename from app/ui/frontend/vite.config.ts rename to polyguard-rl/app/ui/frontend/vite.config.ts diff --git a/polyguard-rl/checkpoints/README.md b/polyguard-rl/checkpoints/README.md new file mode 100644 index 0000000000000000000000000000000000000000..48d47157fd553341cc1f84e40b113699a28a08c2 --- /dev/null +++ b/polyguard-rl/checkpoints/README.md @@ -0,0 +1,23 @@ +# Local checkpoints (not in Git) + +Trained weights live here so clones stay small. After cloning, install the published bundle: + +```bash +cd polyguard-rl +python scripts/install_hf_active_bundle.py +``` + +That creates **`active/`** with: + +| Path | Contents | +|------|----------| +| `active/active_model_manifest.json` | Which artifact to load (GRPO vs merged vs SFT) | +| `active/grpo_adapter/` | PEFT GRPO adapter (+ tokenizer files) | +| `active/merged/` | Full merged Qwen 0.5B weights (~1 GB) | +| `active/sft_adapter/` | SFT LoRA fallback | + +A Hub cache copy may also appear under `.hf_bundles/` (safe to delete after a successful install). + +Enable in `.env`: `POLYGUARD_ENABLE_ACTIVE_MODEL=true` and `POLYGUARD_HF_MODEL=Qwen/Qwen2.5-0.5B-Instruct` (base for the adapter path). + +**If this folder looks empty in the editor:** run the install command above; then confirm with `ls active/`. diff --git a/client.py b/polyguard-rl/client.py similarity index 100% rename from client.py rename to polyguard-rl/client.py diff --git a/configs/agents.yaml b/polyguard-rl/configs/agents.yaml similarity index 100% rename from configs/agents.yaml rename to polyguard-rl/configs/agents.yaml diff --git a/configs/api.yaml b/polyguard-rl/configs/api.yaml similarity index 100% rename from configs/api.yaml rename to polyguard-rl/configs/api.yaml diff --git a/configs/base.yaml b/polyguard-rl/configs/base.yaml similarity index 100% rename from configs/base.yaml rename to polyguard-rl/configs/base.yaml diff --git a/configs/curriculum.yaml b/polyguard-rl/configs/curriculum.yaml similarity index 100% rename from configs/curriculum.yaml rename to polyguard-rl/configs/curriculum.yaml diff --git a/configs/data.yaml b/polyguard-rl/configs/data.yaml similarity index 100% rename from configs/data.yaml rename to polyguard-rl/configs/data.yaml diff --git a/configs/deployment.yaml b/polyguard-rl/configs/deployment.yaml similarity index 100% rename from configs/deployment.yaml rename to polyguard-rl/configs/deployment.yaml diff --git a/configs/env_easy.yaml b/polyguard-rl/configs/env_easy.yaml similarity index 100% rename from configs/env_easy.yaml rename to polyguard-rl/configs/env_easy.yaml diff --git a/configs/env_hard.yaml b/polyguard-rl/configs/env_hard.yaml similarity index 100% rename from configs/env_hard.yaml rename to polyguard-rl/configs/env_hard.yaml diff --git a/configs/env_medium.yaml b/polyguard-rl/configs/env_medium.yaml similarity index 100% rename from configs/env_medium.yaml rename to polyguard-rl/configs/env_medium.yaml diff --git a/configs/eval.yaml b/polyguard-rl/configs/eval.yaml similarity index 100% rename from configs/eval.yaml rename to polyguard-rl/configs/eval.yaml diff --git a/configs/graph_model.yaml b/polyguard-rl/configs/graph_model.yaml similarity index 100% rename from configs/graph_model.yaml rename to polyguard-rl/configs/graph_model.yaml diff --git a/configs/grpo.yaml b/polyguard-rl/configs/grpo.yaml similarity index 100% rename from configs/grpo.yaml rename to polyguard-rl/configs/grpo.yaml diff --git a/configs/models.yaml b/polyguard-rl/configs/models.yaml similarity index 100% rename from configs/models.yaml rename to polyguard-rl/configs/models.yaml diff --git a/configs/paths.yaml b/polyguard-rl/configs/paths.yaml similarity index 100% rename from configs/paths.yaml rename to polyguard-rl/configs/paths.yaml diff --git a/configs/providers.yaml b/polyguard-rl/configs/providers.yaml similarity index 100% rename from configs/providers.yaml rename to polyguard-rl/configs/providers.yaml diff --git a/configs/qlora.yaml b/polyguard-rl/configs/qlora.yaml similarity index 100% rename from configs/qlora.yaml rename to polyguard-rl/configs/qlora.yaml diff --git a/configs/rewards.yaml b/polyguard-rl/configs/rewards.yaml similarity index 100% rename from configs/rewards.yaml rename to polyguard-rl/configs/rewards.yaml diff --git a/configs/risk_model.yaml b/polyguard-rl/configs/risk_model.yaml similarity index 100% rename from configs/risk_model.yaml rename to polyguard-rl/configs/risk_model.yaml diff --git a/configs/sft.yaml b/polyguard-rl/configs/sft.yaml similarity index 100% rename from configs/sft.yaml rename to polyguard-rl/configs/sft.yaml diff --git a/configs/ui.yaml b/polyguard-rl/configs/ui.yaml similarity index 100% rename from configs/ui.yaml rename to polyguard-rl/configs/ui.yaml diff --git a/data/raw/knowledge/drug_knowledge.json b/polyguard-rl/data/raw/knowledge/drug_knowledge.json similarity index 100% rename from data/raw/knowledge/drug_knowledge.json rename to polyguard-rl/data/raw/knowledge/drug_knowledge.json diff --git a/data/scenarios/easy/easy_0000.json b/polyguard-rl/data/scenarios/easy/easy_0000.json similarity index 100% rename from data/scenarios/easy/easy_0000.json rename to polyguard-rl/data/scenarios/easy/easy_0000.json diff --git a/data/scenarios/easy/easy_0001.json b/polyguard-rl/data/scenarios/easy/easy_0001.json similarity index 100% rename from data/scenarios/easy/easy_0001.json rename to polyguard-rl/data/scenarios/easy/easy_0001.json diff --git a/data/scenarios/easy/easy_0002.json b/polyguard-rl/data/scenarios/easy/easy_0002.json similarity index 100% rename from data/scenarios/easy/easy_0002.json rename to polyguard-rl/data/scenarios/easy/easy_0002.json diff --git a/data/scenarios/easy/easy_0003.json b/polyguard-rl/data/scenarios/easy/easy_0003.json similarity index 100% rename from data/scenarios/easy/easy_0003.json rename to polyguard-rl/data/scenarios/easy/easy_0003.json diff --git a/data/scenarios/easy/easy_0004.json b/polyguard-rl/data/scenarios/easy/easy_0004.json similarity index 100% rename from data/scenarios/easy/easy_0004.json rename to polyguard-rl/data/scenarios/easy/easy_0004.json diff --git a/data/scenarios/easy/easy_0005.json b/polyguard-rl/data/scenarios/easy/easy_0005.json similarity index 100% rename from data/scenarios/easy/easy_0005.json rename to polyguard-rl/data/scenarios/easy/easy_0005.json diff --git a/data/scenarios/easy/easy_0006.json b/polyguard-rl/data/scenarios/easy/easy_0006.json similarity index 100% rename from data/scenarios/easy/easy_0006.json rename to polyguard-rl/data/scenarios/easy/easy_0006.json diff --git a/data/scenarios/easy/easy_0007.json b/polyguard-rl/data/scenarios/easy/easy_0007.json similarity index 100% rename from data/scenarios/easy/easy_0007.json rename to polyguard-rl/data/scenarios/easy/easy_0007.json diff --git a/data/scenarios/easy/easy_0008.json b/polyguard-rl/data/scenarios/easy/easy_0008.json similarity index 100% rename from data/scenarios/easy/easy_0008.json rename to polyguard-rl/data/scenarios/easy/easy_0008.json diff --git a/data/scenarios/easy/easy_0009.json b/polyguard-rl/data/scenarios/easy/easy_0009.json similarity index 100% rename from data/scenarios/easy/easy_0009.json rename to polyguard-rl/data/scenarios/easy/easy_0009.json diff --git a/data/scenarios/easy/easy_0010.json b/polyguard-rl/data/scenarios/easy/easy_0010.json similarity index 100% rename from data/scenarios/easy/easy_0010.json rename to polyguard-rl/data/scenarios/easy/easy_0010.json diff --git a/data/scenarios/easy/easy_0011.json b/polyguard-rl/data/scenarios/easy/easy_0011.json similarity index 100% rename from data/scenarios/easy/easy_0011.json rename to polyguard-rl/data/scenarios/easy/easy_0011.json diff --git a/data/scenarios/easy/easy_0012.json b/polyguard-rl/data/scenarios/easy/easy_0012.json similarity index 100% rename from data/scenarios/easy/easy_0012.json rename to polyguard-rl/data/scenarios/easy/easy_0012.json diff --git a/data/scenarios/easy/easy_0013.json b/polyguard-rl/data/scenarios/easy/easy_0013.json similarity index 100% rename from data/scenarios/easy/easy_0013.json rename to polyguard-rl/data/scenarios/easy/easy_0013.json diff --git a/data/scenarios/easy/easy_0014.json b/polyguard-rl/data/scenarios/easy/easy_0014.json similarity index 100% rename from data/scenarios/easy/easy_0014.json rename to polyguard-rl/data/scenarios/easy/easy_0014.json diff --git a/data/scenarios/easy/easy_0015.json b/polyguard-rl/data/scenarios/easy/easy_0015.json similarity index 100% rename from data/scenarios/easy/easy_0015.json rename to polyguard-rl/data/scenarios/easy/easy_0015.json diff --git a/data/scenarios/easy/easy_0016.json b/polyguard-rl/data/scenarios/easy/easy_0016.json similarity index 100% rename from data/scenarios/easy/easy_0016.json rename to polyguard-rl/data/scenarios/easy/easy_0016.json diff --git a/data/scenarios/easy/easy_0017.json b/polyguard-rl/data/scenarios/easy/easy_0017.json similarity index 100% rename from data/scenarios/easy/easy_0017.json rename to polyguard-rl/data/scenarios/easy/easy_0017.json diff --git a/data/scenarios/easy/easy_0018.json b/polyguard-rl/data/scenarios/easy/easy_0018.json similarity index 100% rename from data/scenarios/easy/easy_0018.json rename to polyguard-rl/data/scenarios/easy/easy_0018.json diff --git a/data/scenarios/easy/easy_0019.json b/polyguard-rl/data/scenarios/easy/easy_0019.json similarity index 100% rename from data/scenarios/easy/easy_0019.json rename to polyguard-rl/data/scenarios/easy/easy_0019.json diff --git a/data/scenarios/easy/easy_0020.json b/polyguard-rl/data/scenarios/easy/easy_0020.json similarity index 100% rename from data/scenarios/easy/easy_0020.json rename to polyguard-rl/data/scenarios/easy/easy_0020.json diff --git a/data/scenarios/easy/easy_0021.json b/polyguard-rl/data/scenarios/easy/easy_0021.json similarity index 100% rename from data/scenarios/easy/easy_0021.json rename to polyguard-rl/data/scenarios/easy/easy_0021.json diff --git a/data/scenarios/easy/easy_0022.json b/polyguard-rl/data/scenarios/easy/easy_0022.json similarity index 100% rename from data/scenarios/easy/easy_0022.json rename to polyguard-rl/data/scenarios/easy/easy_0022.json diff --git a/data/scenarios/easy/easy_0023.json b/polyguard-rl/data/scenarios/easy/easy_0023.json similarity index 100% rename from data/scenarios/easy/easy_0023.json rename to polyguard-rl/data/scenarios/easy/easy_0023.json diff --git a/data/scenarios/easy/easy_0024.json b/polyguard-rl/data/scenarios/easy/easy_0024.json similarity index 100% rename from data/scenarios/easy/easy_0024.json rename to polyguard-rl/data/scenarios/easy/easy_0024.json diff --git a/data/scenarios/easy/easy_0025.json b/polyguard-rl/data/scenarios/easy/easy_0025.json similarity index 100% rename from data/scenarios/easy/easy_0025.json rename to polyguard-rl/data/scenarios/easy/easy_0025.json diff --git a/data/scenarios/easy/easy_0026.json b/polyguard-rl/data/scenarios/easy/easy_0026.json similarity index 100% rename from data/scenarios/easy/easy_0026.json rename to polyguard-rl/data/scenarios/easy/easy_0026.json diff --git a/data/scenarios/easy/easy_0027.json b/polyguard-rl/data/scenarios/easy/easy_0027.json similarity index 100% rename from data/scenarios/easy/easy_0027.json rename to polyguard-rl/data/scenarios/easy/easy_0027.json diff --git a/data/scenarios/easy/easy_0028.json b/polyguard-rl/data/scenarios/easy/easy_0028.json similarity index 100% rename from data/scenarios/easy/easy_0028.json rename to polyguard-rl/data/scenarios/easy/easy_0028.json diff --git a/data/scenarios/easy/easy_0029.json b/polyguard-rl/data/scenarios/easy/easy_0029.json similarity index 100% rename from data/scenarios/easy/easy_0029.json rename to polyguard-rl/data/scenarios/easy/easy_0029.json diff --git a/data/scenarios/easy/easy_0030.json b/polyguard-rl/data/scenarios/easy/easy_0030.json similarity index 100% rename from data/scenarios/easy/easy_0030.json rename to polyguard-rl/data/scenarios/easy/easy_0030.json diff --git a/data/scenarios/easy/easy_0031.json b/polyguard-rl/data/scenarios/easy/easy_0031.json similarity index 100% rename from data/scenarios/easy/easy_0031.json rename to polyguard-rl/data/scenarios/easy/easy_0031.json diff --git a/data/scenarios/easy/easy_0032.json b/polyguard-rl/data/scenarios/easy/easy_0032.json similarity index 100% rename from data/scenarios/easy/easy_0032.json rename to polyguard-rl/data/scenarios/easy/easy_0032.json diff --git a/data/scenarios/easy/easy_0033.json b/polyguard-rl/data/scenarios/easy/easy_0033.json similarity index 100% rename from data/scenarios/easy/easy_0033.json rename to polyguard-rl/data/scenarios/easy/easy_0033.json diff --git a/data/scenarios/easy/easy_0034.json b/polyguard-rl/data/scenarios/easy/easy_0034.json similarity index 100% rename from data/scenarios/easy/easy_0034.json rename to polyguard-rl/data/scenarios/easy/easy_0034.json diff --git a/data/scenarios/easy/easy_0035.json b/polyguard-rl/data/scenarios/easy/easy_0035.json similarity index 100% rename from data/scenarios/easy/easy_0035.json rename to polyguard-rl/data/scenarios/easy/easy_0035.json diff --git a/data/scenarios/easy/easy_0036.json b/polyguard-rl/data/scenarios/easy/easy_0036.json similarity index 100% rename from data/scenarios/easy/easy_0036.json rename to polyguard-rl/data/scenarios/easy/easy_0036.json diff --git a/data/scenarios/easy/easy_0037.json b/polyguard-rl/data/scenarios/easy/easy_0037.json similarity index 100% rename from data/scenarios/easy/easy_0037.json rename to polyguard-rl/data/scenarios/easy/easy_0037.json diff --git a/data/scenarios/easy/easy_0038.json b/polyguard-rl/data/scenarios/easy/easy_0038.json similarity index 100% rename from data/scenarios/easy/easy_0038.json rename to polyguard-rl/data/scenarios/easy/easy_0038.json diff --git a/data/scenarios/easy/easy_0039.json b/polyguard-rl/data/scenarios/easy/easy_0039.json similarity index 100% rename from data/scenarios/easy/easy_0039.json rename to polyguard-rl/data/scenarios/easy/easy_0039.json diff --git a/data/scenarios/easy/easy_0040.json b/polyguard-rl/data/scenarios/easy/easy_0040.json similarity index 100% rename from data/scenarios/easy/easy_0040.json rename to polyguard-rl/data/scenarios/easy/easy_0040.json diff --git a/data/scenarios/easy/easy_0041.json b/polyguard-rl/data/scenarios/easy/easy_0041.json similarity index 100% rename from data/scenarios/easy/easy_0041.json rename to polyguard-rl/data/scenarios/easy/easy_0041.json diff --git a/data/scenarios/easy/easy_0042.json b/polyguard-rl/data/scenarios/easy/easy_0042.json similarity index 100% rename from data/scenarios/easy/easy_0042.json rename to polyguard-rl/data/scenarios/easy/easy_0042.json diff --git a/data/scenarios/easy/easy_0043.json b/polyguard-rl/data/scenarios/easy/easy_0043.json similarity index 100% rename from data/scenarios/easy/easy_0043.json rename to polyguard-rl/data/scenarios/easy/easy_0043.json diff --git a/data/scenarios/easy/easy_0044.json b/polyguard-rl/data/scenarios/easy/easy_0044.json similarity index 100% rename from data/scenarios/easy/easy_0044.json rename to polyguard-rl/data/scenarios/easy/easy_0044.json diff --git a/data/scenarios/easy/easy_0045.json b/polyguard-rl/data/scenarios/easy/easy_0045.json similarity index 100% rename from data/scenarios/easy/easy_0045.json rename to polyguard-rl/data/scenarios/easy/easy_0045.json diff --git a/data/scenarios/easy/easy_0046.json b/polyguard-rl/data/scenarios/easy/easy_0046.json similarity index 100% rename from data/scenarios/easy/easy_0046.json rename to polyguard-rl/data/scenarios/easy/easy_0046.json diff --git a/data/scenarios/easy/easy_0047.json b/polyguard-rl/data/scenarios/easy/easy_0047.json similarity index 100% rename from data/scenarios/easy/easy_0047.json rename to polyguard-rl/data/scenarios/easy/easy_0047.json diff --git a/data/scenarios/easy/easy_0048.json b/polyguard-rl/data/scenarios/easy/easy_0048.json similarity index 100% rename from data/scenarios/easy/easy_0048.json rename to polyguard-rl/data/scenarios/easy/easy_0048.json diff --git a/data/scenarios/easy/easy_0049.json b/polyguard-rl/data/scenarios/easy/easy_0049.json similarity index 100% rename from data/scenarios/easy/easy_0049.json rename to polyguard-rl/data/scenarios/easy/easy_0049.json diff --git a/data/scenarios/easy/easy_0050.json b/polyguard-rl/data/scenarios/easy/easy_0050.json similarity index 100% rename from data/scenarios/easy/easy_0050.json rename to polyguard-rl/data/scenarios/easy/easy_0050.json diff --git a/data/scenarios/easy/easy_0051.json b/polyguard-rl/data/scenarios/easy/easy_0051.json similarity index 100% rename from data/scenarios/easy/easy_0051.json rename to polyguard-rl/data/scenarios/easy/easy_0051.json diff --git a/data/scenarios/easy/easy_0052.json b/polyguard-rl/data/scenarios/easy/easy_0052.json similarity index 100% rename from data/scenarios/easy/easy_0052.json rename to polyguard-rl/data/scenarios/easy/easy_0052.json diff --git a/data/scenarios/easy/easy_0053.json b/polyguard-rl/data/scenarios/easy/easy_0053.json similarity index 100% rename from data/scenarios/easy/easy_0053.json rename to polyguard-rl/data/scenarios/easy/easy_0053.json diff --git a/data/scenarios/easy/easy_0054.json b/polyguard-rl/data/scenarios/easy/easy_0054.json similarity index 100% rename from data/scenarios/easy/easy_0054.json rename to polyguard-rl/data/scenarios/easy/easy_0054.json diff --git a/data/scenarios/easy/easy_0055.json b/polyguard-rl/data/scenarios/easy/easy_0055.json similarity index 100% rename from data/scenarios/easy/easy_0055.json rename to polyguard-rl/data/scenarios/easy/easy_0055.json diff --git a/data/scenarios/easy/easy_0056.json b/polyguard-rl/data/scenarios/easy/easy_0056.json similarity index 100% rename from data/scenarios/easy/easy_0056.json rename to polyguard-rl/data/scenarios/easy/easy_0056.json diff --git a/data/scenarios/easy/easy_0057.json b/polyguard-rl/data/scenarios/easy/easy_0057.json similarity index 100% rename from data/scenarios/easy/easy_0057.json rename to polyguard-rl/data/scenarios/easy/easy_0057.json diff --git a/data/scenarios/easy/easy_0058.json b/polyguard-rl/data/scenarios/easy/easy_0058.json similarity index 100% rename from data/scenarios/easy/easy_0058.json rename to polyguard-rl/data/scenarios/easy/easy_0058.json diff --git a/data/scenarios/easy/easy_0059.json b/polyguard-rl/data/scenarios/easy/easy_0059.json similarity index 100% rename from data/scenarios/easy/easy_0059.json rename to polyguard-rl/data/scenarios/easy/easy_0059.json diff --git a/data/scenarios/easy/easy_0060.json b/polyguard-rl/data/scenarios/easy/easy_0060.json similarity index 100% rename from data/scenarios/easy/easy_0060.json rename to polyguard-rl/data/scenarios/easy/easy_0060.json diff --git a/data/scenarios/easy/easy_0061.json b/polyguard-rl/data/scenarios/easy/easy_0061.json similarity index 100% rename from data/scenarios/easy/easy_0061.json rename to polyguard-rl/data/scenarios/easy/easy_0061.json diff --git a/data/scenarios/easy/easy_0062.json b/polyguard-rl/data/scenarios/easy/easy_0062.json similarity index 100% rename from data/scenarios/easy/easy_0062.json rename to polyguard-rl/data/scenarios/easy/easy_0062.json diff --git a/data/scenarios/easy/easy_0063.json b/polyguard-rl/data/scenarios/easy/easy_0063.json similarity index 100% rename from data/scenarios/easy/easy_0063.json rename to polyguard-rl/data/scenarios/easy/easy_0063.json diff --git a/data/scenarios/easy/easy_0064.json b/polyguard-rl/data/scenarios/easy/easy_0064.json similarity index 100% rename from data/scenarios/easy/easy_0064.json rename to polyguard-rl/data/scenarios/easy/easy_0064.json diff --git a/data/scenarios/easy/easy_0065.json b/polyguard-rl/data/scenarios/easy/easy_0065.json similarity index 100% rename from data/scenarios/easy/easy_0065.json rename to polyguard-rl/data/scenarios/easy/easy_0065.json diff --git a/data/scenarios/easy/easy_0066.json b/polyguard-rl/data/scenarios/easy/easy_0066.json similarity index 100% rename from data/scenarios/easy/easy_0066.json rename to polyguard-rl/data/scenarios/easy/easy_0066.json diff --git a/data/scenarios/easy/easy_0067.json b/polyguard-rl/data/scenarios/easy/easy_0067.json similarity index 100% rename from data/scenarios/easy/easy_0067.json rename to polyguard-rl/data/scenarios/easy/easy_0067.json diff --git a/data/scenarios/easy/easy_0068.json b/polyguard-rl/data/scenarios/easy/easy_0068.json similarity index 100% rename from data/scenarios/easy/easy_0068.json rename to polyguard-rl/data/scenarios/easy/easy_0068.json diff --git a/data/scenarios/easy/easy_0069.json b/polyguard-rl/data/scenarios/easy/easy_0069.json similarity index 100% rename from data/scenarios/easy/easy_0069.json rename to polyguard-rl/data/scenarios/easy/easy_0069.json diff --git a/data/scenarios/easy/easy_0070.json b/polyguard-rl/data/scenarios/easy/easy_0070.json similarity index 100% rename from data/scenarios/easy/easy_0070.json rename to polyguard-rl/data/scenarios/easy/easy_0070.json diff --git a/data/scenarios/easy/easy_0071.json b/polyguard-rl/data/scenarios/easy/easy_0071.json similarity index 100% rename from data/scenarios/easy/easy_0071.json rename to polyguard-rl/data/scenarios/easy/easy_0071.json diff --git a/data/scenarios/easy/easy_0072.json b/polyguard-rl/data/scenarios/easy/easy_0072.json similarity index 100% rename from data/scenarios/easy/easy_0072.json rename to polyguard-rl/data/scenarios/easy/easy_0072.json diff --git a/data/scenarios/easy/easy_0073.json b/polyguard-rl/data/scenarios/easy/easy_0073.json similarity index 100% rename from data/scenarios/easy/easy_0073.json rename to polyguard-rl/data/scenarios/easy/easy_0073.json diff --git a/data/scenarios/easy/easy_0074.json b/polyguard-rl/data/scenarios/easy/easy_0074.json similarity index 100% rename from data/scenarios/easy/easy_0074.json rename to polyguard-rl/data/scenarios/easy/easy_0074.json diff --git a/data/scenarios/easy/easy_0075.json b/polyguard-rl/data/scenarios/easy/easy_0075.json similarity index 100% rename from data/scenarios/easy/easy_0075.json rename to polyguard-rl/data/scenarios/easy/easy_0075.json diff --git a/data/scenarios/easy/easy_0076.json b/polyguard-rl/data/scenarios/easy/easy_0076.json similarity index 100% rename from data/scenarios/easy/easy_0076.json rename to polyguard-rl/data/scenarios/easy/easy_0076.json diff --git a/data/scenarios/easy/easy_0077.json b/polyguard-rl/data/scenarios/easy/easy_0077.json similarity index 100% rename from data/scenarios/easy/easy_0077.json rename to polyguard-rl/data/scenarios/easy/easy_0077.json diff --git a/data/scenarios/easy/easy_0078.json b/polyguard-rl/data/scenarios/easy/easy_0078.json similarity index 100% rename from data/scenarios/easy/easy_0078.json rename to polyguard-rl/data/scenarios/easy/easy_0078.json diff --git a/data/scenarios/easy/easy_0079.json b/polyguard-rl/data/scenarios/easy/easy_0079.json similarity index 100% rename from data/scenarios/easy/easy_0079.json rename to polyguard-rl/data/scenarios/easy/easy_0079.json diff --git a/data/scenarios/easy/easy_0080.json b/polyguard-rl/data/scenarios/easy/easy_0080.json similarity index 100% rename from data/scenarios/easy/easy_0080.json rename to polyguard-rl/data/scenarios/easy/easy_0080.json diff --git a/data/scenarios/easy/easy_0081.json b/polyguard-rl/data/scenarios/easy/easy_0081.json similarity index 100% rename from data/scenarios/easy/easy_0081.json rename to polyguard-rl/data/scenarios/easy/easy_0081.json diff --git a/data/scenarios/easy/easy_0082.json b/polyguard-rl/data/scenarios/easy/easy_0082.json similarity index 100% rename from data/scenarios/easy/easy_0082.json rename to polyguard-rl/data/scenarios/easy/easy_0082.json diff --git a/data/scenarios/easy/easy_0083.json b/polyguard-rl/data/scenarios/easy/easy_0083.json similarity index 100% rename from data/scenarios/easy/easy_0083.json rename to polyguard-rl/data/scenarios/easy/easy_0083.json diff --git a/data/scenarios/easy/easy_0084.json b/polyguard-rl/data/scenarios/easy/easy_0084.json similarity index 100% rename from data/scenarios/easy/easy_0084.json rename to polyguard-rl/data/scenarios/easy/easy_0084.json diff --git a/data/scenarios/easy/easy_0085.json b/polyguard-rl/data/scenarios/easy/easy_0085.json similarity index 100% rename from data/scenarios/easy/easy_0085.json rename to polyguard-rl/data/scenarios/easy/easy_0085.json diff --git a/data/scenarios/easy/easy_0086.json b/polyguard-rl/data/scenarios/easy/easy_0086.json similarity index 100% rename from data/scenarios/easy/easy_0086.json rename to polyguard-rl/data/scenarios/easy/easy_0086.json diff --git a/data/scenarios/easy/easy_0087.json b/polyguard-rl/data/scenarios/easy/easy_0087.json similarity index 100% rename from data/scenarios/easy/easy_0087.json rename to polyguard-rl/data/scenarios/easy/easy_0087.json diff --git a/data/scenarios/easy/easy_0088.json b/polyguard-rl/data/scenarios/easy/easy_0088.json similarity index 100% rename from data/scenarios/easy/easy_0088.json rename to polyguard-rl/data/scenarios/easy/easy_0088.json diff --git a/data/scenarios/easy/easy_0089.json b/polyguard-rl/data/scenarios/easy/easy_0089.json similarity index 100% rename from data/scenarios/easy/easy_0089.json rename to polyguard-rl/data/scenarios/easy/easy_0089.json diff --git a/data/scenarios/easy/easy_0090.json b/polyguard-rl/data/scenarios/easy/easy_0090.json similarity index 100% rename from data/scenarios/easy/easy_0090.json rename to polyguard-rl/data/scenarios/easy/easy_0090.json diff --git a/data/scenarios/easy/easy_0091.json b/polyguard-rl/data/scenarios/easy/easy_0091.json similarity index 100% rename from data/scenarios/easy/easy_0091.json rename to polyguard-rl/data/scenarios/easy/easy_0091.json diff --git a/data/scenarios/easy/easy_0092.json b/polyguard-rl/data/scenarios/easy/easy_0092.json similarity index 100% rename from data/scenarios/easy/easy_0092.json rename to polyguard-rl/data/scenarios/easy/easy_0092.json diff --git a/data/scenarios/easy/easy_0093.json b/polyguard-rl/data/scenarios/easy/easy_0093.json similarity index 100% rename from data/scenarios/easy/easy_0093.json rename to polyguard-rl/data/scenarios/easy/easy_0093.json diff --git a/data/scenarios/easy/easy_0094.json b/polyguard-rl/data/scenarios/easy/easy_0094.json similarity index 100% rename from data/scenarios/easy/easy_0094.json rename to polyguard-rl/data/scenarios/easy/easy_0094.json diff --git a/data/scenarios/easy/easy_0095.json b/polyguard-rl/data/scenarios/easy/easy_0095.json similarity index 100% rename from data/scenarios/easy/easy_0095.json rename to polyguard-rl/data/scenarios/easy/easy_0095.json diff --git a/data/scenarios/easy/easy_0096.json b/polyguard-rl/data/scenarios/easy/easy_0096.json similarity index 100% rename from data/scenarios/easy/easy_0096.json rename to polyguard-rl/data/scenarios/easy/easy_0096.json diff --git a/data/scenarios/easy/easy_0097.json b/polyguard-rl/data/scenarios/easy/easy_0097.json similarity index 100% rename from data/scenarios/easy/easy_0097.json rename to polyguard-rl/data/scenarios/easy/easy_0097.json diff --git a/data/scenarios/easy/easy_0098.json b/polyguard-rl/data/scenarios/easy/easy_0098.json similarity index 100% rename from data/scenarios/easy/easy_0098.json rename to polyguard-rl/data/scenarios/easy/easy_0098.json diff --git a/data/scenarios/easy/easy_0099.json b/polyguard-rl/data/scenarios/easy/easy_0099.json similarity index 100% rename from data/scenarios/easy/easy_0099.json rename to polyguard-rl/data/scenarios/easy/easy_0099.json diff --git a/data/scenarios/hard/hard_0000.json b/polyguard-rl/data/scenarios/hard/hard_0000.json similarity index 100% rename from data/scenarios/hard/hard_0000.json rename to polyguard-rl/data/scenarios/hard/hard_0000.json diff --git a/data/scenarios/hard/hard_0001.json b/polyguard-rl/data/scenarios/hard/hard_0001.json similarity index 100% rename from data/scenarios/hard/hard_0001.json rename to polyguard-rl/data/scenarios/hard/hard_0001.json diff --git a/data/scenarios/hard/hard_0002.json b/polyguard-rl/data/scenarios/hard/hard_0002.json similarity index 100% rename from data/scenarios/hard/hard_0002.json rename to polyguard-rl/data/scenarios/hard/hard_0002.json diff --git a/data/scenarios/hard/hard_0003.json b/polyguard-rl/data/scenarios/hard/hard_0003.json similarity index 100% rename from data/scenarios/hard/hard_0003.json rename to polyguard-rl/data/scenarios/hard/hard_0003.json diff --git a/data/scenarios/hard/hard_0004.json b/polyguard-rl/data/scenarios/hard/hard_0004.json similarity index 100% rename from data/scenarios/hard/hard_0004.json rename to polyguard-rl/data/scenarios/hard/hard_0004.json diff --git a/data/scenarios/hard/hard_0005.json b/polyguard-rl/data/scenarios/hard/hard_0005.json similarity index 100% rename from data/scenarios/hard/hard_0005.json rename to polyguard-rl/data/scenarios/hard/hard_0005.json diff --git a/data/scenarios/hard/hard_0006.json b/polyguard-rl/data/scenarios/hard/hard_0006.json similarity index 100% rename from data/scenarios/hard/hard_0006.json rename to polyguard-rl/data/scenarios/hard/hard_0006.json diff --git a/data/scenarios/hard/hard_0007.json b/polyguard-rl/data/scenarios/hard/hard_0007.json similarity index 100% rename from data/scenarios/hard/hard_0007.json rename to polyguard-rl/data/scenarios/hard/hard_0007.json diff --git a/data/scenarios/hard/hard_0008.json b/polyguard-rl/data/scenarios/hard/hard_0008.json similarity index 100% rename from data/scenarios/hard/hard_0008.json rename to polyguard-rl/data/scenarios/hard/hard_0008.json diff --git a/data/scenarios/hard/hard_0009.json b/polyguard-rl/data/scenarios/hard/hard_0009.json similarity index 100% rename from data/scenarios/hard/hard_0009.json rename to polyguard-rl/data/scenarios/hard/hard_0009.json diff --git a/data/scenarios/hard/hard_0010.json b/polyguard-rl/data/scenarios/hard/hard_0010.json similarity index 100% rename from data/scenarios/hard/hard_0010.json rename to polyguard-rl/data/scenarios/hard/hard_0010.json diff --git a/data/scenarios/hard/hard_0011.json b/polyguard-rl/data/scenarios/hard/hard_0011.json similarity index 100% rename from data/scenarios/hard/hard_0011.json rename to polyguard-rl/data/scenarios/hard/hard_0011.json diff --git a/data/scenarios/hard/hard_0012.json b/polyguard-rl/data/scenarios/hard/hard_0012.json similarity index 100% rename from data/scenarios/hard/hard_0012.json rename to polyguard-rl/data/scenarios/hard/hard_0012.json diff --git a/data/scenarios/hard/hard_0013.json b/polyguard-rl/data/scenarios/hard/hard_0013.json similarity index 100% rename from data/scenarios/hard/hard_0013.json rename to polyguard-rl/data/scenarios/hard/hard_0013.json diff --git a/data/scenarios/hard/hard_0014.json b/polyguard-rl/data/scenarios/hard/hard_0014.json similarity index 100% rename from data/scenarios/hard/hard_0014.json rename to polyguard-rl/data/scenarios/hard/hard_0014.json diff --git a/data/scenarios/hard/hard_0015.json b/polyguard-rl/data/scenarios/hard/hard_0015.json similarity index 100% rename from data/scenarios/hard/hard_0015.json rename to polyguard-rl/data/scenarios/hard/hard_0015.json diff --git a/data/scenarios/hard/hard_0016.json b/polyguard-rl/data/scenarios/hard/hard_0016.json similarity index 100% rename from data/scenarios/hard/hard_0016.json rename to polyguard-rl/data/scenarios/hard/hard_0016.json diff --git a/data/scenarios/hard/hard_0017.json b/polyguard-rl/data/scenarios/hard/hard_0017.json similarity index 100% rename from data/scenarios/hard/hard_0017.json rename to polyguard-rl/data/scenarios/hard/hard_0017.json diff --git a/data/scenarios/hard/hard_0018.json b/polyguard-rl/data/scenarios/hard/hard_0018.json similarity index 100% rename from data/scenarios/hard/hard_0018.json rename to polyguard-rl/data/scenarios/hard/hard_0018.json diff --git a/data/scenarios/hard/hard_0019.json b/polyguard-rl/data/scenarios/hard/hard_0019.json similarity index 100% rename from data/scenarios/hard/hard_0019.json rename to polyguard-rl/data/scenarios/hard/hard_0019.json diff --git a/data/scenarios/hard/hard_0020.json b/polyguard-rl/data/scenarios/hard/hard_0020.json similarity index 100% rename from data/scenarios/hard/hard_0020.json rename to polyguard-rl/data/scenarios/hard/hard_0020.json diff --git a/data/scenarios/hard/hard_0021.json b/polyguard-rl/data/scenarios/hard/hard_0021.json similarity index 100% rename from data/scenarios/hard/hard_0021.json rename to polyguard-rl/data/scenarios/hard/hard_0021.json diff --git a/data/scenarios/hard/hard_0022.json b/polyguard-rl/data/scenarios/hard/hard_0022.json similarity index 100% rename from data/scenarios/hard/hard_0022.json rename to polyguard-rl/data/scenarios/hard/hard_0022.json diff --git a/data/scenarios/hard/hard_0023.json b/polyguard-rl/data/scenarios/hard/hard_0023.json similarity index 100% rename from data/scenarios/hard/hard_0023.json rename to polyguard-rl/data/scenarios/hard/hard_0023.json diff --git a/data/scenarios/hard/hard_0024.json b/polyguard-rl/data/scenarios/hard/hard_0024.json similarity index 100% rename from data/scenarios/hard/hard_0024.json rename to polyguard-rl/data/scenarios/hard/hard_0024.json diff --git a/data/scenarios/hard/hard_0025.json b/polyguard-rl/data/scenarios/hard/hard_0025.json similarity index 100% rename from data/scenarios/hard/hard_0025.json rename to polyguard-rl/data/scenarios/hard/hard_0025.json diff --git a/data/scenarios/hard/hard_0026.json b/polyguard-rl/data/scenarios/hard/hard_0026.json similarity index 100% rename from data/scenarios/hard/hard_0026.json rename to polyguard-rl/data/scenarios/hard/hard_0026.json diff --git a/data/scenarios/hard/hard_0027.json b/polyguard-rl/data/scenarios/hard/hard_0027.json similarity index 100% rename from data/scenarios/hard/hard_0027.json rename to polyguard-rl/data/scenarios/hard/hard_0027.json diff --git a/data/scenarios/hard/hard_0028.json b/polyguard-rl/data/scenarios/hard/hard_0028.json similarity index 100% rename from data/scenarios/hard/hard_0028.json rename to polyguard-rl/data/scenarios/hard/hard_0028.json diff --git a/data/scenarios/hard/hard_0029.json b/polyguard-rl/data/scenarios/hard/hard_0029.json similarity index 100% rename from data/scenarios/hard/hard_0029.json rename to polyguard-rl/data/scenarios/hard/hard_0029.json diff --git a/data/scenarios/hard/hard_0030.json b/polyguard-rl/data/scenarios/hard/hard_0030.json similarity index 100% rename from data/scenarios/hard/hard_0030.json rename to polyguard-rl/data/scenarios/hard/hard_0030.json diff --git a/data/scenarios/hard/hard_0031.json b/polyguard-rl/data/scenarios/hard/hard_0031.json similarity index 100% rename from data/scenarios/hard/hard_0031.json rename to polyguard-rl/data/scenarios/hard/hard_0031.json diff --git a/data/scenarios/hard/hard_0032.json b/polyguard-rl/data/scenarios/hard/hard_0032.json similarity index 100% rename from data/scenarios/hard/hard_0032.json rename to polyguard-rl/data/scenarios/hard/hard_0032.json diff --git a/data/scenarios/hard/hard_0033.json b/polyguard-rl/data/scenarios/hard/hard_0033.json similarity index 100% rename from data/scenarios/hard/hard_0033.json rename to polyguard-rl/data/scenarios/hard/hard_0033.json diff --git a/data/scenarios/hard/hard_0034.json b/polyguard-rl/data/scenarios/hard/hard_0034.json similarity index 100% rename from data/scenarios/hard/hard_0034.json rename to polyguard-rl/data/scenarios/hard/hard_0034.json diff --git a/data/scenarios/hard/hard_0035.json b/polyguard-rl/data/scenarios/hard/hard_0035.json similarity index 100% rename from data/scenarios/hard/hard_0035.json rename to polyguard-rl/data/scenarios/hard/hard_0035.json diff --git a/data/scenarios/hard/hard_0036.json b/polyguard-rl/data/scenarios/hard/hard_0036.json similarity index 100% rename from data/scenarios/hard/hard_0036.json rename to polyguard-rl/data/scenarios/hard/hard_0036.json diff --git a/data/scenarios/hard/hard_0037.json b/polyguard-rl/data/scenarios/hard/hard_0037.json similarity index 100% rename from data/scenarios/hard/hard_0037.json rename to polyguard-rl/data/scenarios/hard/hard_0037.json diff --git a/data/scenarios/hard/hard_0038.json b/polyguard-rl/data/scenarios/hard/hard_0038.json similarity index 100% rename from data/scenarios/hard/hard_0038.json rename to polyguard-rl/data/scenarios/hard/hard_0038.json diff --git a/data/scenarios/hard/hard_0039.json b/polyguard-rl/data/scenarios/hard/hard_0039.json similarity index 100% rename from data/scenarios/hard/hard_0039.json rename to polyguard-rl/data/scenarios/hard/hard_0039.json diff --git a/data/scenarios/hard/hard_0040.json b/polyguard-rl/data/scenarios/hard/hard_0040.json similarity index 100% rename from data/scenarios/hard/hard_0040.json rename to polyguard-rl/data/scenarios/hard/hard_0040.json diff --git a/data/scenarios/hard/hard_0041.json b/polyguard-rl/data/scenarios/hard/hard_0041.json similarity index 100% rename from data/scenarios/hard/hard_0041.json rename to polyguard-rl/data/scenarios/hard/hard_0041.json diff --git a/data/scenarios/hard/hard_0042.json b/polyguard-rl/data/scenarios/hard/hard_0042.json similarity index 100% rename from data/scenarios/hard/hard_0042.json rename to polyguard-rl/data/scenarios/hard/hard_0042.json diff --git a/data/scenarios/hard/hard_0043.json b/polyguard-rl/data/scenarios/hard/hard_0043.json similarity index 100% rename from data/scenarios/hard/hard_0043.json rename to polyguard-rl/data/scenarios/hard/hard_0043.json diff --git a/data/scenarios/hard/hard_0044.json b/polyguard-rl/data/scenarios/hard/hard_0044.json similarity index 100% rename from data/scenarios/hard/hard_0044.json rename to polyguard-rl/data/scenarios/hard/hard_0044.json diff --git a/data/scenarios/hard/hard_0045.json b/polyguard-rl/data/scenarios/hard/hard_0045.json similarity index 100% rename from data/scenarios/hard/hard_0045.json rename to polyguard-rl/data/scenarios/hard/hard_0045.json diff --git a/data/scenarios/hard/hard_0046.json b/polyguard-rl/data/scenarios/hard/hard_0046.json similarity index 100% rename from data/scenarios/hard/hard_0046.json rename to polyguard-rl/data/scenarios/hard/hard_0046.json diff --git a/data/scenarios/hard/hard_0047.json b/polyguard-rl/data/scenarios/hard/hard_0047.json similarity index 100% rename from data/scenarios/hard/hard_0047.json rename to polyguard-rl/data/scenarios/hard/hard_0047.json diff --git a/data/scenarios/hard/hard_0048.json b/polyguard-rl/data/scenarios/hard/hard_0048.json similarity index 100% rename from data/scenarios/hard/hard_0048.json rename to polyguard-rl/data/scenarios/hard/hard_0048.json diff --git a/data/scenarios/hard/hard_0049.json b/polyguard-rl/data/scenarios/hard/hard_0049.json similarity index 100% rename from data/scenarios/hard/hard_0049.json rename to polyguard-rl/data/scenarios/hard/hard_0049.json diff --git a/data/scenarios/hard/hard_0050.json b/polyguard-rl/data/scenarios/hard/hard_0050.json similarity index 100% rename from data/scenarios/hard/hard_0050.json rename to polyguard-rl/data/scenarios/hard/hard_0050.json diff --git a/data/scenarios/hard/hard_0051.json b/polyguard-rl/data/scenarios/hard/hard_0051.json similarity index 100% rename from data/scenarios/hard/hard_0051.json rename to polyguard-rl/data/scenarios/hard/hard_0051.json diff --git a/data/scenarios/hard/hard_0052.json b/polyguard-rl/data/scenarios/hard/hard_0052.json similarity index 100% rename from data/scenarios/hard/hard_0052.json rename to polyguard-rl/data/scenarios/hard/hard_0052.json diff --git a/data/scenarios/hard/hard_0053.json b/polyguard-rl/data/scenarios/hard/hard_0053.json similarity index 100% rename from data/scenarios/hard/hard_0053.json rename to polyguard-rl/data/scenarios/hard/hard_0053.json diff --git a/data/scenarios/hard/hard_0054.json b/polyguard-rl/data/scenarios/hard/hard_0054.json similarity index 100% rename from data/scenarios/hard/hard_0054.json rename to polyguard-rl/data/scenarios/hard/hard_0054.json diff --git a/data/scenarios/hard/hard_0055.json b/polyguard-rl/data/scenarios/hard/hard_0055.json similarity index 100% rename from data/scenarios/hard/hard_0055.json rename to polyguard-rl/data/scenarios/hard/hard_0055.json diff --git a/data/scenarios/hard/hard_0056.json b/polyguard-rl/data/scenarios/hard/hard_0056.json similarity index 100% rename from data/scenarios/hard/hard_0056.json rename to polyguard-rl/data/scenarios/hard/hard_0056.json diff --git a/data/scenarios/hard/hard_0057.json b/polyguard-rl/data/scenarios/hard/hard_0057.json similarity index 100% rename from data/scenarios/hard/hard_0057.json rename to polyguard-rl/data/scenarios/hard/hard_0057.json diff --git a/data/scenarios/hard/hard_0058.json b/polyguard-rl/data/scenarios/hard/hard_0058.json similarity index 100% rename from data/scenarios/hard/hard_0058.json rename to polyguard-rl/data/scenarios/hard/hard_0058.json diff --git a/data/scenarios/hard/hard_0059.json b/polyguard-rl/data/scenarios/hard/hard_0059.json similarity index 100% rename from data/scenarios/hard/hard_0059.json rename to polyguard-rl/data/scenarios/hard/hard_0059.json diff --git a/data/scenarios/hard/hard_0060.json b/polyguard-rl/data/scenarios/hard/hard_0060.json similarity index 100% rename from data/scenarios/hard/hard_0060.json rename to polyguard-rl/data/scenarios/hard/hard_0060.json diff --git a/data/scenarios/hard/hard_0061.json b/polyguard-rl/data/scenarios/hard/hard_0061.json similarity index 100% rename from data/scenarios/hard/hard_0061.json rename to polyguard-rl/data/scenarios/hard/hard_0061.json diff --git a/data/scenarios/hard/hard_0062.json b/polyguard-rl/data/scenarios/hard/hard_0062.json similarity index 100% rename from data/scenarios/hard/hard_0062.json rename to polyguard-rl/data/scenarios/hard/hard_0062.json diff --git a/data/scenarios/hard/hard_0063.json b/polyguard-rl/data/scenarios/hard/hard_0063.json similarity index 100% rename from data/scenarios/hard/hard_0063.json rename to polyguard-rl/data/scenarios/hard/hard_0063.json diff --git a/data/scenarios/hard/hard_0064.json b/polyguard-rl/data/scenarios/hard/hard_0064.json similarity index 100% rename from data/scenarios/hard/hard_0064.json rename to polyguard-rl/data/scenarios/hard/hard_0064.json diff --git a/data/scenarios/hard/hard_0065.json b/polyguard-rl/data/scenarios/hard/hard_0065.json similarity index 100% rename from data/scenarios/hard/hard_0065.json rename to polyguard-rl/data/scenarios/hard/hard_0065.json diff --git a/data/scenarios/hard/hard_0066.json b/polyguard-rl/data/scenarios/hard/hard_0066.json similarity index 100% rename from data/scenarios/hard/hard_0066.json rename to polyguard-rl/data/scenarios/hard/hard_0066.json diff --git a/data/scenarios/hard/hard_0067.json b/polyguard-rl/data/scenarios/hard/hard_0067.json similarity index 100% rename from data/scenarios/hard/hard_0067.json rename to polyguard-rl/data/scenarios/hard/hard_0067.json diff --git a/data/scenarios/hard/hard_0068.json b/polyguard-rl/data/scenarios/hard/hard_0068.json similarity index 100% rename from data/scenarios/hard/hard_0068.json rename to polyguard-rl/data/scenarios/hard/hard_0068.json diff --git a/data/scenarios/hard/hard_0069.json b/polyguard-rl/data/scenarios/hard/hard_0069.json similarity index 100% rename from data/scenarios/hard/hard_0069.json rename to polyguard-rl/data/scenarios/hard/hard_0069.json diff --git a/data/scenarios/hard/hard_0070.json b/polyguard-rl/data/scenarios/hard/hard_0070.json similarity index 100% rename from data/scenarios/hard/hard_0070.json rename to polyguard-rl/data/scenarios/hard/hard_0070.json diff --git a/data/scenarios/hard/hard_0071.json b/polyguard-rl/data/scenarios/hard/hard_0071.json similarity index 100% rename from data/scenarios/hard/hard_0071.json rename to polyguard-rl/data/scenarios/hard/hard_0071.json diff --git a/data/scenarios/hard/hard_0072.json b/polyguard-rl/data/scenarios/hard/hard_0072.json similarity index 100% rename from data/scenarios/hard/hard_0072.json rename to polyguard-rl/data/scenarios/hard/hard_0072.json diff --git a/data/scenarios/hard/hard_0073.json b/polyguard-rl/data/scenarios/hard/hard_0073.json similarity index 100% rename from data/scenarios/hard/hard_0073.json rename to polyguard-rl/data/scenarios/hard/hard_0073.json diff --git a/data/scenarios/hard/hard_0074.json b/polyguard-rl/data/scenarios/hard/hard_0074.json similarity index 100% rename from data/scenarios/hard/hard_0074.json rename to polyguard-rl/data/scenarios/hard/hard_0074.json diff --git a/data/scenarios/hard/hard_0075.json b/polyguard-rl/data/scenarios/hard/hard_0075.json similarity index 100% rename from data/scenarios/hard/hard_0075.json rename to polyguard-rl/data/scenarios/hard/hard_0075.json diff --git a/data/scenarios/hard/hard_0076.json b/polyguard-rl/data/scenarios/hard/hard_0076.json similarity index 100% rename from data/scenarios/hard/hard_0076.json rename to polyguard-rl/data/scenarios/hard/hard_0076.json diff --git a/data/scenarios/hard/hard_0077.json b/polyguard-rl/data/scenarios/hard/hard_0077.json similarity index 100% rename from data/scenarios/hard/hard_0077.json rename to polyguard-rl/data/scenarios/hard/hard_0077.json diff --git a/data/scenarios/hard/hard_0078.json b/polyguard-rl/data/scenarios/hard/hard_0078.json similarity index 100% rename from data/scenarios/hard/hard_0078.json rename to polyguard-rl/data/scenarios/hard/hard_0078.json diff --git a/data/scenarios/hard/hard_0079.json b/polyguard-rl/data/scenarios/hard/hard_0079.json similarity index 100% rename from data/scenarios/hard/hard_0079.json rename to polyguard-rl/data/scenarios/hard/hard_0079.json diff --git a/data/scenarios/hard/hard_0080.json b/polyguard-rl/data/scenarios/hard/hard_0080.json similarity index 100% rename from data/scenarios/hard/hard_0080.json rename to polyguard-rl/data/scenarios/hard/hard_0080.json diff --git a/data/scenarios/hard/hard_0081.json b/polyguard-rl/data/scenarios/hard/hard_0081.json similarity index 100% rename from data/scenarios/hard/hard_0081.json rename to polyguard-rl/data/scenarios/hard/hard_0081.json diff --git a/data/scenarios/hard/hard_0082.json b/polyguard-rl/data/scenarios/hard/hard_0082.json similarity index 100% rename from data/scenarios/hard/hard_0082.json rename to polyguard-rl/data/scenarios/hard/hard_0082.json diff --git a/data/scenarios/hard/hard_0083.json b/polyguard-rl/data/scenarios/hard/hard_0083.json similarity index 100% rename from data/scenarios/hard/hard_0083.json rename to polyguard-rl/data/scenarios/hard/hard_0083.json diff --git a/data/scenarios/hard/hard_0084.json b/polyguard-rl/data/scenarios/hard/hard_0084.json similarity index 100% rename from data/scenarios/hard/hard_0084.json rename to polyguard-rl/data/scenarios/hard/hard_0084.json diff --git a/data/scenarios/hard/hard_0085.json b/polyguard-rl/data/scenarios/hard/hard_0085.json similarity index 100% rename from data/scenarios/hard/hard_0085.json rename to polyguard-rl/data/scenarios/hard/hard_0085.json diff --git a/data/scenarios/hard/hard_0086.json b/polyguard-rl/data/scenarios/hard/hard_0086.json similarity index 100% rename from data/scenarios/hard/hard_0086.json rename to polyguard-rl/data/scenarios/hard/hard_0086.json diff --git a/data/scenarios/hard/hard_0087.json b/polyguard-rl/data/scenarios/hard/hard_0087.json similarity index 100% rename from data/scenarios/hard/hard_0087.json rename to polyguard-rl/data/scenarios/hard/hard_0087.json diff --git a/data/scenarios/hard/hard_0088.json b/polyguard-rl/data/scenarios/hard/hard_0088.json similarity index 100% rename from data/scenarios/hard/hard_0088.json rename to polyguard-rl/data/scenarios/hard/hard_0088.json diff --git a/data/scenarios/hard/hard_0089.json b/polyguard-rl/data/scenarios/hard/hard_0089.json similarity index 100% rename from data/scenarios/hard/hard_0089.json rename to polyguard-rl/data/scenarios/hard/hard_0089.json diff --git a/data/scenarios/hard/hard_0090.json b/polyguard-rl/data/scenarios/hard/hard_0090.json similarity index 100% rename from data/scenarios/hard/hard_0090.json rename to polyguard-rl/data/scenarios/hard/hard_0090.json diff --git a/data/scenarios/hard/hard_0091.json b/polyguard-rl/data/scenarios/hard/hard_0091.json similarity index 100% rename from data/scenarios/hard/hard_0091.json rename to polyguard-rl/data/scenarios/hard/hard_0091.json diff --git a/data/scenarios/hard/hard_0092.json b/polyguard-rl/data/scenarios/hard/hard_0092.json similarity index 100% rename from data/scenarios/hard/hard_0092.json rename to polyguard-rl/data/scenarios/hard/hard_0092.json diff --git a/data/scenarios/hard/hard_0093.json b/polyguard-rl/data/scenarios/hard/hard_0093.json similarity index 100% rename from data/scenarios/hard/hard_0093.json rename to polyguard-rl/data/scenarios/hard/hard_0093.json diff --git a/data/scenarios/hard/hard_0094.json b/polyguard-rl/data/scenarios/hard/hard_0094.json similarity index 100% rename from data/scenarios/hard/hard_0094.json rename to polyguard-rl/data/scenarios/hard/hard_0094.json diff --git a/data/scenarios/hard/hard_0095.json b/polyguard-rl/data/scenarios/hard/hard_0095.json similarity index 100% rename from data/scenarios/hard/hard_0095.json rename to polyguard-rl/data/scenarios/hard/hard_0095.json diff --git a/data/scenarios/hard/hard_0096.json b/polyguard-rl/data/scenarios/hard/hard_0096.json similarity index 100% rename from data/scenarios/hard/hard_0096.json rename to polyguard-rl/data/scenarios/hard/hard_0096.json diff --git a/data/scenarios/hard/hard_0097.json b/polyguard-rl/data/scenarios/hard/hard_0097.json similarity index 100% rename from data/scenarios/hard/hard_0097.json rename to polyguard-rl/data/scenarios/hard/hard_0097.json diff --git a/data/scenarios/hard/hard_0098.json b/polyguard-rl/data/scenarios/hard/hard_0098.json similarity index 100% rename from data/scenarios/hard/hard_0098.json rename to polyguard-rl/data/scenarios/hard/hard_0098.json diff --git a/data/scenarios/hard/hard_0099.json b/polyguard-rl/data/scenarios/hard/hard_0099.json similarity index 100% rename from data/scenarios/hard/hard_0099.json rename to polyguard-rl/data/scenarios/hard/hard_0099.json diff --git a/data/scenarios/hard/hard_0100.json b/polyguard-rl/data/scenarios/hard/hard_0100.json similarity index 100% rename from data/scenarios/hard/hard_0100.json rename to polyguard-rl/data/scenarios/hard/hard_0100.json diff --git a/data/scenarios/hard/hard_0101.json b/polyguard-rl/data/scenarios/hard/hard_0101.json similarity index 100% rename from data/scenarios/hard/hard_0101.json rename to polyguard-rl/data/scenarios/hard/hard_0101.json diff --git a/data/scenarios/hard/hard_0102.json b/polyguard-rl/data/scenarios/hard/hard_0102.json similarity index 100% rename from data/scenarios/hard/hard_0102.json rename to polyguard-rl/data/scenarios/hard/hard_0102.json diff --git a/data/scenarios/hard/hard_0103.json b/polyguard-rl/data/scenarios/hard/hard_0103.json similarity index 100% rename from data/scenarios/hard/hard_0103.json rename to polyguard-rl/data/scenarios/hard/hard_0103.json diff --git a/data/scenarios/hard/hard_0104.json b/polyguard-rl/data/scenarios/hard/hard_0104.json similarity index 100% rename from data/scenarios/hard/hard_0104.json rename to polyguard-rl/data/scenarios/hard/hard_0104.json diff --git a/data/scenarios/hard/hard_0105.json b/polyguard-rl/data/scenarios/hard/hard_0105.json similarity index 100% rename from data/scenarios/hard/hard_0105.json rename to polyguard-rl/data/scenarios/hard/hard_0105.json diff --git a/data/scenarios/hard/hard_0106.json b/polyguard-rl/data/scenarios/hard/hard_0106.json similarity index 100% rename from data/scenarios/hard/hard_0106.json rename to polyguard-rl/data/scenarios/hard/hard_0106.json diff --git a/data/scenarios/hard/hard_0107.json b/polyguard-rl/data/scenarios/hard/hard_0107.json similarity index 100% rename from data/scenarios/hard/hard_0107.json rename to polyguard-rl/data/scenarios/hard/hard_0107.json diff --git a/data/scenarios/hard/hard_0108.json b/polyguard-rl/data/scenarios/hard/hard_0108.json similarity index 100% rename from data/scenarios/hard/hard_0108.json rename to polyguard-rl/data/scenarios/hard/hard_0108.json diff --git a/data/scenarios/hard/hard_0109.json b/polyguard-rl/data/scenarios/hard/hard_0109.json similarity index 100% rename from data/scenarios/hard/hard_0109.json rename to polyguard-rl/data/scenarios/hard/hard_0109.json diff --git a/data/scenarios/hard/hard_0110.json b/polyguard-rl/data/scenarios/hard/hard_0110.json similarity index 100% rename from data/scenarios/hard/hard_0110.json rename to polyguard-rl/data/scenarios/hard/hard_0110.json diff --git a/data/scenarios/hard/hard_0111.json b/polyguard-rl/data/scenarios/hard/hard_0111.json similarity index 100% rename from data/scenarios/hard/hard_0111.json rename to polyguard-rl/data/scenarios/hard/hard_0111.json diff --git a/data/scenarios/hard/hard_0112.json b/polyguard-rl/data/scenarios/hard/hard_0112.json similarity index 100% rename from data/scenarios/hard/hard_0112.json rename to polyguard-rl/data/scenarios/hard/hard_0112.json diff --git a/data/scenarios/hard/hard_0113.json b/polyguard-rl/data/scenarios/hard/hard_0113.json similarity index 100% rename from data/scenarios/hard/hard_0113.json rename to polyguard-rl/data/scenarios/hard/hard_0113.json diff --git a/data/scenarios/hard/hard_0114.json b/polyguard-rl/data/scenarios/hard/hard_0114.json similarity index 100% rename from data/scenarios/hard/hard_0114.json rename to polyguard-rl/data/scenarios/hard/hard_0114.json diff --git a/data/scenarios/hard/hard_0115.json b/polyguard-rl/data/scenarios/hard/hard_0115.json similarity index 100% rename from data/scenarios/hard/hard_0115.json rename to polyguard-rl/data/scenarios/hard/hard_0115.json diff --git a/data/scenarios/hard/hard_0116.json b/polyguard-rl/data/scenarios/hard/hard_0116.json similarity index 100% rename from data/scenarios/hard/hard_0116.json rename to polyguard-rl/data/scenarios/hard/hard_0116.json diff --git a/data/scenarios/hard/hard_0117.json b/polyguard-rl/data/scenarios/hard/hard_0117.json similarity index 100% rename from data/scenarios/hard/hard_0117.json rename to polyguard-rl/data/scenarios/hard/hard_0117.json diff --git a/data/scenarios/hard/hard_0118.json b/polyguard-rl/data/scenarios/hard/hard_0118.json similarity index 100% rename from data/scenarios/hard/hard_0118.json rename to polyguard-rl/data/scenarios/hard/hard_0118.json diff --git a/data/scenarios/hard/hard_0119.json b/polyguard-rl/data/scenarios/hard/hard_0119.json similarity index 100% rename from data/scenarios/hard/hard_0119.json rename to polyguard-rl/data/scenarios/hard/hard_0119.json diff --git a/data/scenarios/hard/hard_0120.json b/polyguard-rl/data/scenarios/hard/hard_0120.json similarity index 100% rename from data/scenarios/hard/hard_0120.json rename to polyguard-rl/data/scenarios/hard/hard_0120.json diff --git a/data/scenarios/hard/hard_0121.json b/polyguard-rl/data/scenarios/hard/hard_0121.json similarity index 100% rename from data/scenarios/hard/hard_0121.json rename to polyguard-rl/data/scenarios/hard/hard_0121.json diff --git a/data/scenarios/hard/hard_0122.json b/polyguard-rl/data/scenarios/hard/hard_0122.json similarity index 100% rename from data/scenarios/hard/hard_0122.json rename to polyguard-rl/data/scenarios/hard/hard_0122.json diff --git a/data/scenarios/hard/hard_0123.json b/polyguard-rl/data/scenarios/hard/hard_0123.json similarity index 100% rename from data/scenarios/hard/hard_0123.json rename to polyguard-rl/data/scenarios/hard/hard_0123.json diff --git a/data/scenarios/hard/hard_0124.json b/polyguard-rl/data/scenarios/hard/hard_0124.json similarity index 100% rename from data/scenarios/hard/hard_0124.json rename to polyguard-rl/data/scenarios/hard/hard_0124.json diff --git a/data/scenarios/hard/hard_0125.json b/polyguard-rl/data/scenarios/hard/hard_0125.json similarity index 100% rename from data/scenarios/hard/hard_0125.json rename to polyguard-rl/data/scenarios/hard/hard_0125.json diff --git a/data/scenarios/hard/hard_0126.json b/polyguard-rl/data/scenarios/hard/hard_0126.json similarity index 100% rename from data/scenarios/hard/hard_0126.json rename to polyguard-rl/data/scenarios/hard/hard_0126.json diff --git a/data/scenarios/hard/hard_0127.json b/polyguard-rl/data/scenarios/hard/hard_0127.json similarity index 100% rename from data/scenarios/hard/hard_0127.json rename to polyguard-rl/data/scenarios/hard/hard_0127.json diff --git a/data/scenarios/hard/hard_0128.json b/polyguard-rl/data/scenarios/hard/hard_0128.json similarity index 100% rename from data/scenarios/hard/hard_0128.json rename to polyguard-rl/data/scenarios/hard/hard_0128.json diff --git a/data/scenarios/hard/hard_0129.json b/polyguard-rl/data/scenarios/hard/hard_0129.json similarity index 100% rename from data/scenarios/hard/hard_0129.json rename to polyguard-rl/data/scenarios/hard/hard_0129.json diff --git a/data/scenarios/hard/hard_0130.json b/polyguard-rl/data/scenarios/hard/hard_0130.json similarity index 100% rename from data/scenarios/hard/hard_0130.json rename to polyguard-rl/data/scenarios/hard/hard_0130.json diff --git a/data/scenarios/hard/hard_0131.json b/polyguard-rl/data/scenarios/hard/hard_0131.json similarity index 100% rename from data/scenarios/hard/hard_0131.json rename to polyguard-rl/data/scenarios/hard/hard_0131.json diff --git a/data/scenarios/hard/hard_0132.json b/polyguard-rl/data/scenarios/hard/hard_0132.json similarity index 100% rename from data/scenarios/hard/hard_0132.json rename to polyguard-rl/data/scenarios/hard/hard_0132.json diff --git a/data/scenarios/hard/hard_0133.json b/polyguard-rl/data/scenarios/hard/hard_0133.json similarity index 100% rename from data/scenarios/hard/hard_0133.json rename to polyguard-rl/data/scenarios/hard/hard_0133.json diff --git a/data/scenarios/hard/hard_0134.json b/polyguard-rl/data/scenarios/hard/hard_0134.json similarity index 100% rename from data/scenarios/hard/hard_0134.json rename to polyguard-rl/data/scenarios/hard/hard_0134.json diff --git a/data/scenarios/hard/hard_0135.json b/polyguard-rl/data/scenarios/hard/hard_0135.json similarity index 100% rename from data/scenarios/hard/hard_0135.json rename to polyguard-rl/data/scenarios/hard/hard_0135.json diff --git a/data/scenarios/hard/hard_0136.json b/polyguard-rl/data/scenarios/hard/hard_0136.json similarity index 100% rename from data/scenarios/hard/hard_0136.json rename to polyguard-rl/data/scenarios/hard/hard_0136.json diff --git a/data/scenarios/hard/hard_0137.json b/polyguard-rl/data/scenarios/hard/hard_0137.json similarity index 100% rename from data/scenarios/hard/hard_0137.json rename to polyguard-rl/data/scenarios/hard/hard_0137.json diff --git a/data/scenarios/hard/hard_0138.json b/polyguard-rl/data/scenarios/hard/hard_0138.json similarity index 100% rename from data/scenarios/hard/hard_0138.json rename to polyguard-rl/data/scenarios/hard/hard_0138.json diff --git a/data/scenarios/hard/hard_0139.json b/polyguard-rl/data/scenarios/hard/hard_0139.json similarity index 100% rename from data/scenarios/hard/hard_0139.json rename to polyguard-rl/data/scenarios/hard/hard_0139.json diff --git a/data/scenarios/hard/hard_0140.json b/polyguard-rl/data/scenarios/hard/hard_0140.json similarity index 100% rename from data/scenarios/hard/hard_0140.json rename to polyguard-rl/data/scenarios/hard/hard_0140.json diff --git a/data/scenarios/hard/hard_0141.json b/polyguard-rl/data/scenarios/hard/hard_0141.json similarity index 100% rename from data/scenarios/hard/hard_0141.json rename to polyguard-rl/data/scenarios/hard/hard_0141.json diff --git a/data/scenarios/hard/hard_0142.json b/polyguard-rl/data/scenarios/hard/hard_0142.json similarity index 100% rename from data/scenarios/hard/hard_0142.json rename to polyguard-rl/data/scenarios/hard/hard_0142.json diff --git a/data/scenarios/hard/hard_0143.json b/polyguard-rl/data/scenarios/hard/hard_0143.json similarity index 100% rename from data/scenarios/hard/hard_0143.json rename to polyguard-rl/data/scenarios/hard/hard_0143.json diff --git a/data/scenarios/hard/hard_0144.json b/polyguard-rl/data/scenarios/hard/hard_0144.json similarity index 100% rename from data/scenarios/hard/hard_0144.json rename to polyguard-rl/data/scenarios/hard/hard_0144.json diff --git a/data/scenarios/hard/hard_0145.json b/polyguard-rl/data/scenarios/hard/hard_0145.json similarity index 100% rename from data/scenarios/hard/hard_0145.json rename to polyguard-rl/data/scenarios/hard/hard_0145.json diff --git a/data/scenarios/hard/hard_0146.json b/polyguard-rl/data/scenarios/hard/hard_0146.json similarity index 100% rename from data/scenarios/hard/hard_0146.json rename to polyguard-rl/data/scenarios/hard/hard_0146.json diff --git a/data/scenarios/hard/hard_0147.json b/polyguard-rl/data/scenarios/hard/hard_0147.json similarity index 100% rename from data/scenarios/hard/hard_0147.json rename to polyguard-rl/data/scenarios/hard/hard_0147.json diff --git a/data/scenarios/hard/hard_0148.json b/polyguard-rl/data/scenarios/hard/hard_0148.json similarity index 100% rename from data/scenarios/hard/hard_0148.json rename to polyguard-rl/data/scenarios/hard/hard_0148.json diff --git a/data/scenarios/hard/hard_0149.json b/polyguard-rl/data/scenarios/hard/hard_0149.json similarity index 100% rename from data/scenarios/hard/hard_0149.json rename to polyguard-rl/data/scenarios/hard/hard_0149.json diff --git a/data/scenarios/hard/hard_0150.json b/polyguard-rl/data/scenarios/hard/hard_0150.json similarity index 100% rename from data/scenarios/hard/hard_0150.json rename to polyguard-rl/data/scenarios/hard/hard_0150.json diff --git a/data/scenarios/hard/hard_0151.json b/polyguard-rl/data/scenarios/hard/hard_0151.json similarity index 100% rename from data/scenarios/hard/hard_0151.json rename to polyguard-rl/data/scenarios/hard/hard_0151.json diff --git a/data/scenarios/hard/hard_0152.json b/polyguard-rl/data/scenarios/hard/hard_0152.json similarity index 100% rename from data/scenarios/hard/hard_0152.json rename to polyguard-rl/data/scenarios/hard/hard_0152.json diff --git a/data/scenarios/hard/hard_0153.json b/polyguard-rl/data/scenarios/hard/hard_0153.json similarity index 100% rename from data/scenarios/hard/hard_0153.json rename to polyguard-rl/data/scenarios/hard/hard_0153.json diff --git a/data/scenarios/hard/hard_0154.json b/polyguard-rl/data/scenarios/hard/hard_0154.json similarity index 100% rename from data/scenarios/hard/hard_0154.json rename to polyguard-rl/data/scenarios/hard/hard_0154.json diff --git a/data/scenarios/hard/hard_0155.json b/polyguard-rl/data/scenarios/hard/hard_0155.json similarity index 100% rename from data/scenarios/hard/hard_0155.json rename to polyguard-rl/data/scenarios/hard/hard_0155.json diff --git a/data/scenarios/hard/hard_0156.json b/polyguard-rl/data/scenarios/hard/hard_0156.json similarity index 100% rename from data/scenarios/hard/hard_0156.json rename to polyguard-rl/data/scenarios/hard/hard_0156.json diff --git a/data/scenarios/hard/hard_0157.json b/polyguard-rl/data/scenarios/hard/hard_0157.json similarity index 100% rename from data/scenarios/hard/hard_0157.json rename to polyguard-rl/data/scenarios/hard/hard_0157.json diff --git a/data/scenarios/hard/hard_0158.json b/polyguard-rl/data/scenarios/hard/hard_0158.json similarity index 100% rename from data/scenarios/hard/hard_0158.json rename to polyguard-rl/data/scenarios/hard/hard_0158.json diff --git a/data/scenarios/hard/hard_0159.json b/polyguard-rl/data/scenarios/hard/hard_0159.json similarity index 100% rename from data/scenarios/hard/hard_0159.json rename to polyguard-rl/data/scenarios/hard/hard_0159.json diff --git a/data/scenarios/hard/hard_0160.json b/polyguard-rl/data/scenarios/hard/hard_0160.json similarity index 100% rename from data/scenarios/hard/hard_0160.json rename to polyguard-rl/data/scenarios/hard/hard_0160.json diff --git a/data/scenarios/hard/hard_0161.json b/polyguard-rl/data/scenarios/hard/hard_0161.json similarity index 100% rename from data/scenarios/hard/hard_0161.json rename to polyguard-rl/data/scenarios/hard/hard_0161.json diff --git a/data/scenarios/hard/hard_0162.json b/polyguard-rl/data/scenarios/hard/hard_0162.json similarity index 100% rename from data/scenarios/hard/hard_0162.json rename to polyguard-rl/data/scenarios/hard/hard_0162.json diff --git a/data/scenarios/hard/hard_0163.json b/polyguard-rl/data/scenarios/hard/hard_0163.json similarity index 100% rename from data/scenarios/hard/hard_0163.json rename to polyguard-rl/data/scenarios/hard/hard_0163.json diff --git a/data/scenarios/hard/hard_0164.json b/polyguard-rl/data/scenarios/hard/hard_0164.json similarity index 100% rename from data/scenarios/hard/hard_0164.json rename to polyguard-rl/data/scenarios/hard/hard_0164.json diff --git a/data/scenarios/hard/hard_0165.json b/polyguard-rl/data/scenarios/hard/hard_0165.json similarity index 100% rename from data/scenarios/hard/hard_0165.json rename to polyguard-rl/data/scenarios/hard/hard_0165.json diff --git a/data/scenarios/hard/hard_0166.json b/polyguard-rl/data/scenarios/hard/hard_0166.json similarity index 100% rename from data/scenarios/hard/hard_0166.json rename to polyguard-rl/data/scenarios/hard/hard_0166.json diff --git a/data/scenarios/hard/hard_0167.json b/polyguard-rl/data/scenarios/hard/hard_0167.json similarity index 100% rename from data/scenarios/hard/hard_0167.json rename to polyguard-rl/data/scenarios/hard/hard_0167.json diff --git a/data/scenarios/hard/hard_0168.json b/polyguard-rl/data/scenarios/hard/hard_0168.json similarity index 100% rename from data/scenarios/hard/hard_0168.json rename to polyguard-rl/data/scenarios/hard/hard_0168.json diff --git a/data/scenarios/hard/hard_0169.json b/polyguard-rl/data/scenarios/hard/hard_0169.json similarity index 100% rename from data/scenarios/hard/hard_0169.json rename to polyguard-rl/data/scenarios/hard/hard_0169.json diff --git a/data/scenarios/hard/hard_0170.json b/polyguard-rl/data/scenarios/hard/hard_0170.json similarity index 100% rename from data/scenarios/hard/hard_0170.json rename to polyguard-rl/data/scenarios/hard/hard_0170.json diff --git a/data/scenarios/hard/hard_0171.json b/polyguard-rl/data/scenarios/hard/hard_0171.json similarity index 100% rename from data/scenarios/hard/hard_0171.json rename to polyguard-rl/data/scenarios/hard/hard_0171.json diff --git a/data/scenarios/hard/hard_0172.json b/polyguard-rl/data/scenarios/hard/hard_0172.json similarity index 100% rename from data/scenarios/hard/hard_0172.json rename to polyguard-rl/data/scenarios/hard/hard_0172.json diff --git a/data/scenarios/hard/hard_0173.json b/polyguard-rl/data/scenarios/hard/hard_0173.json similarity index 100% rename from data/scenarios/hard/hard_0173.json rename to polyguard-rl/data/scenarios/hard/hard_0173.json diff --git a/data/scenarios/hard/hard_0174.json b/polyguard-rl/data/scenarios/hard/hard_0174.json similarity index 100% rename from data/scenarios/hard/hard_0174.json rename to polyguard-rl/data/scenarios/hard/hard_0174.json diff --git a/data/scenarios/hard/hard_0175.json b/polyguard-rl/data/scenarios/hard/hard_0175.json similarity index 100% rename from data/scenarios/hard/hard_0175.json rename to polyguard-rl/data/scenarios/hard/hard_0175.json diff --git a/data/scenarios/hard/hard_0176.json b/polyguard-rl/data/scenarios/hard/hard_0176.json similarity index 100% rename from data/scenarios/hard/hard_0176.json rename to polyguard-rl/data/scenarios/hard/hard_0176.json diff --git a/data/scenarios/hard/hard_0177.json b/polyguard-rl/data/scenarios/hard/hard_0177.json similarity index 100% rename from data/scenarios/hard/hard_0177.json rename to polyguard-rl/data/scenarios/hard/hard_0177.json diff --git a/data/scenarios/hard/hard_0178.json b/polyguard-rl/data/scenarios/hard/hard_0178.json similarity index 100% rename from data/scenarios/hard/hard_0178.json rename to polyguard-rl/data/scenarios/hard/hard_0178.json diff --git a/data/scenarios/hard/hard_0179.json b/polyguard-rl/data/scenarios/hard/hard_0179.json similarity index 100% rename from data/scenarios/hard/hard_0179.json rename to polyguard-rl/data/scenarios/hard/hard_0179.json diff --git a/data/scenarios/hard/hard_0180.json b/polyguard-rl/data/scenarios/hard/hard_0180.json similarity index 100% rename from data/scenarios/hard/hard_0180.json rename to polyguard-rl/data/scenarios/hard/hard_0180.json diff --git a/data/scenarios/hard/hard_0181.json b/polyguard-rl/data/scenarios/hard/hard_0181.json similarity index 100% rename from data/scenarios/hard/hard_0181.json rename to polyguard-rl/data/scenarios/hard/hard_0181.json diff --git a/data/scenarios/hard/hard_0182.json b/polyguard-rl/data/scenarios/hard/hard_0182.json similarity index 100% rename from data/scenarios/hard/hard_0182.json rename to polyguard-rl/data/scenarios/hard/hard_0182.json diff --git a/data/scenarios/hard/hard_0183.json b/polyguard-rl/data/scenarios/hard/hard_0183.json similarity index 100% rename from data/scenarios/hard/hard_0183.json rename to polyguard-rl/data/scenarios/hard/hard_0183.json diff --git a/data/scenarios/hard/hard_0184.json b/polyguard-rl/data/scenarios/hard/hard_0184.json similarity index 100% rename from data/scenarios/hard/hard_0184.json rename to polyguard-rl/data/scenarios/hard/hard_0184.json diff --git a/data/scenarios/hard/hard_0185.json b/polyguard-rl/data/scenarios/hard/hard_0185.json similarity index 100% rename from data/scenarios/hard/hard_0185.json rename to polyguard-rl/data/scenarios/hard/hard_0185.json diff --git a/data/scenarios/hard/hard_0186.json b/polyguard-rl/data/scenarios/hard/hard_0186.json similarity index 100% rename from data/scenarios/hard/hard_0186.json rename to polyguard-rl/data/scenarios/hard/hard_0186.json diff --git a/data/scenarios/hard/hard_0187.json b/polyguard-rl/data/scenarios/hard/hard_0187.json similarity index 100% rename from data/scenarios/hard/hard_0187.json rename to polyguard-rl/data/scenarios/hard/hard_0187.json diff --git a/data/scenarios/hard/hard_0188.json b/polyguard-rl/data/scenarios/hard/hard_0188.json similarity index 100% rename from data/scenarios/hard/hard_0188.json rename to polyguard-rl/data/scenarios/hard/hard_0188.json diff --git a/data/scenarios/hard/hard_0189.json b/polyguard-rl/data/scenarios/hard/hard_0189.json similarity index 100% rename from data/scenarios/hard/hard_0189.json rename to polyguard-rl/data/scenarios/hard/hard_0189.json diff --git a/data/scenarios/hard/hard_0190.json b/polyguard-rl/data/scenarios/hard/hard_0190.json similarity index 100% rename from data/scenarios/hard/hard_0190.json rename to polyguard-rl/data/scenarios/hard/hard_0190.json diff --git a/data/scenarios/hard/hard_0191.json b/polyguard-rl/data/scenarios/hard/hard_0191.json similarity index 100% rename from data/scenarios/hard/hard_0191.json rename to polyguard-rl/data/scenarios/hard/hard_0191.json diff --git a/data/scenarios/hard/hard_0192.json b/polyguard-rl/data/scenarios/hard/hard_0192.json similarity index 100% rename from data/scenarios/hard/hard_0192.json rename to polyguard-rl/data/scenarios/hard/hard_0192.json diff --git a/data/scenarios/hard/hard_0193.json b/polyguard-rl/data/scenarios/hard/hard_0193.json similarity index 100% rename from data/scenarios/hard/hard_0193.json rename to polyguard-rl/data/scenarios/hard/hard_0193.json diff --git a/data/scenarios/hard/hard_0194.json b/polyguard-rl/data/scenarios/hard/hard_0194.json similarity index 100% rename from data/scenarios/hard/hard_0194.json rename to polyguard-rl/data/scenarios/hard/hard_0194.json diff --git a/data/scenarios/hard/hard_0195.json b/polyguard-rl/data/scenarios/hard/hard_0195.json similarity index 100% rename from data/scenarios/hard/hard_0195.json rename to polyguard-rl/data/scenarios/hard/hard_0195.json diff --git a/data/scenarios/hard/hard_0196.json b/polyguard-rl/data/scenarios/hard/hard_0196.json similarity index 100% rename from data/scenarios/hard/hard_0196.json rename to polyguard-rl/data/scenarios/hard/hard_0196.json diff --git a/data/scenarios/hard/hard_0197.json b/polyguard-rl/data/scenarios/hard/hard_0197.json similarity index 100% rename from data/scenarios/hard/hard_0197.json rename to polyguard-rl/data/scenarios/hard/hard_0197.json diff --git a/data/scenarios/hard/hard_0198.json b/polyguard-rl/data/scenarios/hard/hard_0198.json similarity index 100% rename from data/scenarios/hard/hard_0198.json rename to polyguard-rl/data/scenarios/hard/hard_0198.json diff --git a/data/scenarios/hard/hard_0199.json b/polyguard-rl/data/scenarios/hard/hard_0199.json similarity index 100% rename from data/scenarios/hard/hard_0199.json rename to polyguard-rl/data/scenarios/hard/hard_0199.json diff --git a/data/scenarios/medium/medium_0000.json b/polyguard-rl/data/scenarios/medium/medium_0000.json similarity index 100% rename from data/scenarios/medium/medium_0000.json rename to polyguard-rl/data/scenarios/medium/medium_0000.json diff --git a/data/scenarios/medium/medium_0001.json b/polyguard-rl/data/scenarios/medium/medium_0001.json similarity index 100% rename from data/scenarios/medium/medium_0001.json rename to polyguard-rl/data/scenarios/medium/medium_0001.json diff --git a/data/scenarios/medium/medium_0002.json b/polyguard-rl/data/scenarios/medium/medium_0002.json similarity index 100% rename from data/scenarios/medium/medium_0002.json rename to polyguard-rl/data/scenarios/medium/medium_0002.json diff --git a/data/scenarios/medium/medium_0003.json b/polyguard-rl/data/scenarios/medium/medium_0003.json similarity index 100% rename from data/scenarios/medium/medium_0003.json rename to polyguard-rl/data/scenarios/medium/medium_0003.json diff --git a/data/scenarios/medium/medium_0004.json b/polyguard-rl/data/scenarios/medium/medium_0004.json similarity index 100% rename from data/scenarios/medium/medium_0004.json rename to polyguard-rl/data/scenarios/medium/medium_0004.json diff --git a/data/scenarios/medium/medium_0005.json b/polyguard-rl/data/scenarios/medium/medium_0005.json similarity index 100% rename from data/scenarios/medium/medium_0005.json rename to polyguard-rl/data/scenarios/medium/medium_0005.json diff --git a/data/scenarios/medium/medium_0006.json b/polyguard-rl/data/scenarios/medium/medium_0006.json similarity index 100% rename from data/scenarios/medium/medium_0006.json rename to polyguard-rl/data/scenarios/medium/medium_0006.json diff --git a/data/scenarios/medium/medium_0007.json b/polyguard-rl/data/scenarios/medium/medium_0007.json similarity index 100% rename from data/scenarios/medium/medium_0007.json rename to polyguard-rl/data/scenarios/medium/medium_0007.json diff --git a/data/scenarios/medium/medium_0008.json b/polyguard-rl/data/scenarios/medium/medium_0008.json similarity index 100% rename from data/scenarios/medium/medium_0008.json rename to polyguard-rl/data/scenarios/medium/medium_0008.json diff --git a/data/scenarios/medium/medium_0009.json b/polyguard-rl/data/scenarios/medium/medium_0009.json similarity index 100% rename from data/scenarios/medium/medium_0009.json rename to polyguard-rl/data/scenarios/medium/medium_0009.json diff --git a/data/scenarios/medium/medium_0010.json b/polyguard-rl/data/scenarios/medium/medium_0010.json similarity index 100% rename from data/scenarios/medium/medium_0010.json rename to polyguard-rl/data/scenarios/medium/medium_0010.json diff --git a/data/scenarios/medium/medium_0011.json b/polyguard-rl/data/scenarios/medium/medium_0011.json similarity index 100% rename from data/scenarios/medium/medium_0011.json rename to polyguard-rl/data/scenarios/medium/medium_0011.json diff --git a/data/scenarios/medium/medium_0012.json b/polyguard-rl/data/scenarios/medium/medium_0012.json similarity index 100% rename from data/scenarios/medium/medium_0012.json rename to polyguard-rl/data/scenarios/medium/medium_0012.json diff --git a/data/scenarios/medium/medium_0013.json b/polyguard-rl/data/scenarios/medium/medium_0013.json similarity index 100% rename from data/scenarios/medium/medium_0013.json rename to polyguard-rl/data/scenarios/medium/medium_0013.json diff --git a/data/scenarios/medium/medium_0014.json b/polyguard-rl/data/scenarios/medium/medium_0014.json similarity index 100% rename from data/scenarios/medium/medium_0014.json rename to polyguard-rl/data/scenarios/medium/medium_0014.json diff --git a/data/scenarios/medium/medium_0015.json b/polyguard-rl/data/scenarios/medium/medium_0015.json similarity index 100% rename from data/scenarios/medium/medium_0015.json rename to polyguard-rl/data/scenarios/medium/medium_0015.json diff --git a/data/scenarios/medium/medium_0016.json b/polyguard-rl/data/scenarios/medium/medium_0016.json similarity index 100% rename from data/scenarios/medium/medium_0016.json rename to polyguard-rl/data/scenarios/medium/medium_0016.json diff --git a/data/scenarios/medium/medium_0017.json b/polyguard-rl/data/scenarios/medium/medium_0017.json similarity index 100% rename from data/scenarios/medium/medium_0017.json rename to polyguard-rl/data/scenarios/medium/medium_0017.json diff --git a/data/scenarios/medium/medium_0018.json b/polyguard-rl/data/scenarios/medium/medium_0018.json similarity index 100% rename from data/scenarios/medium/medium_0018.json rename to polyguard-rl/data/scenarios/medium/medium_0018.json diff --git a/data/scenarios/medium/medium_0019.json b/polyguard-rl/data/scenarios/medium/medium_0019.json similarity index 100% rename from data/scenarios/medium/medium_0019.json rename to polyguard-rl/data/scenarios/medium/medium_0019.json diff --git a/data/scenarios/medium/medium_0020.json b/polyguard-rl/data/scenarios/medium/medium_0020.json similarity index 100% rename from data/scenarios/medium/medium_0020.json rename to polyguard-rl/data/scenarios/medium/medium_0020.json diff --git a/data/scenarios/medium/medium_0021.json b/polyguard-rl/data/scenarios/medium/medium_0021.json similarity index 100% rename from data/scenarios/medium/medium_0021.json rename to polyguard-rl/data/scenarios/medium/medium_0021.json diff --git a/data/scenarios/medium/medium_0022.json b/polyguard-rl/data/scenarios/medium/medium_0022.json similarity index 100% rename from data/scenarios/medium/medium_0022.json rename to polyguard-rl/data/scenarios/medium/medium_0022.json diff --git a/data/scenarios/medium/medium_0023.json b/polyguard-rl/data/scenarios/medium/medium_0023.json similarity index 100% rename from data/scenarios/medium/medium_0023.json rename to polyguard-rl/data/scenarios/medium/medium_0023.json diff --git a/data/scenarios/medium/medium_0024.json b/polyguard-rl/data/scenarios/medium/medium_0024.json similarity index 100% rename from data/scenarios/medium/medium_0024.json rename to polyguard-rl/data/scenarios/medium/medium_0024.json diff --git a/data/scenarios/medium/medium_0025.json b/polyguard-rl/data/scenarios/medium/medium_0025.json similarity index 100% rename from data/scenarios/medium/medium_0025.json rename to polyguard-rl/data/scenarios/medium/medium_0025.json diff --git a/data/scenarios/medium/medium_0026.json b/polyguard-rl/data/scenarios/medium/medium_0026.json similarity index 100% rename from data/scenarios/medium/medium_0026.json rename to polyguard-rl/data/scenarios/medium/medium_0026.json diff --git a/data/scenarios/medium/medium_0027.json b/polyguard-rl/data/scenarios/medium/medium_0027.json similarity index 100% rename from data/scenarios/medium/medium_0027.json rename to polyguard-rl/data/scenarios/medium/medium_0027.json diff --git a/data/scenarios/medium/medium_0028.json b/polyguard-rl/data/scenarios/medium/medium_0028.json similarity index 100% rename from data/scenarios/medium/medium_0028.json rename to polyguard-rl/data/scenarios/medium/medium_0028.json diff --git a/data/scenarios/medium/medium_0029.json b/polyguard-rl/data/scenarios/medium/medium_0029.json similarity index 100% rename from data/scenarios/medium/medium_0029.json rename to polyguard-rl/data/scenarios/medium/medium_0029.json diff --git a/data/scenarios/medium/medium_0030.json b/polyguard-rl/data/scenarios/medium/medium_0030.json similarity index 100% rename from data/scenarios/medium/medium_0030.json rename to polyguard-rl/data/scenarios/medium/medium_0030.json diff --git a/data/scenarios/medium/medium_0031.json b/polyguard-rl/data/scenarios/medium/medium_0031.json similarity index 100% rename from data/scenarios/medium/medium_0031.json rename to polyguard-rl/data/scenarios/medium/medium_0031.json diff --git a/data/scenarios/medium/medium_0032.json b/polyguard-rl/data/scenarios/medium/medium_0032.json similarity index 100% rename from data/scenarios/medium/medium_0032.json rename to polyguard-rl/data/scenarios/medium/medium_0032.json diff --git a/data/scenarios/medium/medium_0033.json b/polyguard-rl/data/scenarios/medium/medium_0033.json similarity index 100% rename from data/scenarios/medium/medium_0033.json rename to polyguard-rl/data/scenarios/medium/medium_0033.json diff --git a/data/scenarios/medium/medium_0034.json b/polyguard-rl/data/scenarios/medium/medium_0034.json similarity index 100% rename from data/scenarios/medium/medium_0034.json rename to polyguard-rl/data/scenarios/medium/medium_0034.json diff --git a/data/scenarios/medium/medium_0035.json b/polyguard-rl/data/scenarios/medium/medium_0035.json similarity index 100% rename from data/scenarios/medium/medium_0035.json rename to polyguard-rl/data/scenarios/medium/medium_0035.json diff --git a/data/scenarios/medium/medium_0036.json b/polyguard-rl/data/scenarios/medium/medium_0036.json similarity index 100% rename from data/scenarios/medium/medium_0036.json rename to polyguard-rl/data/scenarios/medium/medium_0036.json diff --git a/data/scenarios/medium/medium_0037.json b/polyguard-rl/data/scenarios/medium/medium_0037.json similarity index 100% rename from data/scenarios/medium/medium_0037.json rename to polyguard-rl/data/scenarios/medium/medium_0037.json diff --git a/data/scenarios/medium/medium_0038.json b/polyguard-rl/data/scenarios/medium/medium_0038.json similarity index 100% rename from data/scenarios/medium/medium_0038.json rename to polyguard-rl/data/scenarios/medium/medium_0038.json diff --git a/data/scenarios/medium/medium_0039.json b/polyguard-rl/data/scenarios/medium/medium_0039.json similarity index 100% rename from data/scenarios/medium/medium_0039.json rename to polyguard-rl/data/scenarios/medium/medium_0039.json diff --git a/data/scenarios/medium/medium_0040.json b/polyguard-rl/data/scenarios/medium/medium_0040.json similarity index 100% rename from data/scenarios/medium/medium_0040.json rename to polyguard-rl/data/scenarios/medium/medium_0040.json diff --git a/data/scenarios/medium/medium_0041.json b/polyguard-rl/data/scenarios/medium/medium_0041.json similarity index 100% rename from data/scenarios/medium/medium_0041.json rename to polyguard-rl/data/scenarios/medium/medium_0041.json diff --git a/data/scenarios/medium/medium_0042.json b/polyguard-rl/data/scenarios/medium/medium_0042.json similarity index 100% rename from data/scenarios/medium/medium_0042.json rename to polyguard-rl/data/scenarios/medium/medium_0042.json diff --git a/data/scenarios/medium/medium_0043.json b/polyguard-rl/data/scenarios/medium/medium_0043.json similarity index 100% rename from data/scenarios/medium/medium_0043.json rename to polyguard-rl/data/scenarios/medium/medium_0043.json diff --git a/data/scenarios/medium/medium_0044.json b/polyguard-rl/data/scenarios/medium/medium_0044.json similarity index 100% rename from data/scenarios/medium/medium_0044.json rename to polyguard-rl/data/scenarios/medium/medium_0044.json diff --git a/data/scenarios/medium/medium_0045.json b/polyguard-rl/data/scenarios/medium/medium_0045.json similarity index 100% rename from data/scenarios/medium/medium_0045.json rename to polyguard-rl/data/scenarios/medium/medium_0045.json diff --git a/data/scenarios/medium/medium_0046.json b/polyguard-rl/data/scenarios/medium/medium_0046.json similarity index 100% rename from data/scenarios/medium/medium_0046.json rename to polyguard-rl/data/scenarios/medium/medium_0046.json diff --git a/data/scenarios/medium/medium_0047.json b/polyguard-rl/data/scenarios/medium/medium_0047.json similarity index 100% rename from data/scenarios/medium/medium_0047.json rename to polyguard-rl/data/scenarios/medium/medium_0047.json diff --git a/data/scenarios/medium/medium_0048.json b/polyguard-rl/data/scenarios/medium/medium_0048.json similarity index 100% rename from data/scenarios/medium/medium_0048.json rename to polyguard-rl/data/scenarios/medium/medium_0048.json diff --git a/data/scenarios/medium/medium_0049.json b/polyguard-rl/data/scenarios/medium/medium_0049.json similarity index 100% rename from data/scenarios/medium/medium_0049.json rename to polyguard-rl/data/scenarios/medium/medium_0049.json diff --git a/data/scenarios/medium/medium_0050.json b/polyguard-rl/data/scenarios/medium/medium_0050.json similarity index 100% rename from data/scenarios/medium/medium_0050.json rename to polyguard-rl/data/scenarios/medium/medium_0050.json diff --git a/data/scenarios/medium/medium_0051.json b/polyguard-rl/data/scenarios/medium/medium_0051.json similarity index 100% rename from data/scenarios/medium/medium_0051.json rename to polyguard-rl/data/scenarios/medium/medium_0051.json diff --git a/data/scenarios/medium/medium_0052.json b/polyguard-rl/data/scenarios/medium/medium_0052.json similarity index 100% rename from data/scenarios/medium/medium_0052.json rename to polyguard-rl/data/scenarios/medium/medium_0052.json diff --git a/data/scenarios/medium/medium_0053.json b/polyguard-rl/data/scenarios/medium/medium_0053.json similarity index 100% rename from data/scenarios/medium/medium_0053.json rename to polyguard-rl/data/scenarios/medium/medium_0053.json diff --git a/data/scenarios/medium/medium_0054.json b/polyguard-rl/data/scenarios/medium/medium_0054.json similarity index 100% rename from data/scenarios/medium/medium_0054.json rename to polyguard-rl/data/scenarios/medium/medium_0054.json diff --git a/data/scenarios/medium/medium_0055.json b/polyguard-rl/data/scenarios/medium/medium_0055.json similarity index 100% rename from data/scenarios/medium/medium_0055.json rename to polyguard-rl/data/scenarios/medium/medium_0055.json diff --git a/data/scenarios/medium/medium_0056.json b/polyguard-rl/data/scenarios/medium/medium_0056.json similarity index 100% rename from data/scenarios/medium/medium_0056.json rename to polyguard-rl/data/scenarios/medium/medium_0056.json diff --git a/data/scenarios/medium/medium_0057.json b/polyguard-rl/data/scenarios/medium/medium_0057.json similarity index 100% rename from data/scenarios/medium/medium_0057.json rename to polyguard-rl/data/scenarios/medium/medium_0057.json diff --git a/data/scenarios/medium/medium_0058.json b/polyguard-rl/data/scenarios/medium/medium_0058.json similarity index 100% rename from data/scenarios/medium/medium_0058.json rename to polyguard-rl/data/scenarios/medium/medium_0058.json diff --git a/data/scenarios/medium/medium_0059.json b/polyguard-rl/data/scenarios/medium/medium_0059.json similarity index 100% rename from data/scenarios/medium/medium_0059.json rename to polyguard-rl/data/scenarios/medium/medium_0059.json diff --git a/data/scenarios/medium/medium_0060.json b/polyguard-rl/data/scenarios/medium/medium_0060.json similarity index 100% rename from data/scenarios/medium/medium_0060.json rename to polyguard-rl/data/scenarios/medium/medium_0060.json diff --git a/data/scenarios/medium/medium_0061.json b/polyguard-rl/data/scenarios/medium/medium_0061.json similarity index 100% rename from data/scenarios/medium/medium_0061.json rename to polyguard-rl/data/scenarios/medium/medium_0061.json diff --git a/data/scenarios/medium/medium_0062.json b/polyguard-rl/data/scenarios/medium/medium_0062.json similarity index 100% rename from data/scenarios/medium/medium_0062.json rename to polyguard-rl/data/scenarios/medium/medium_0062.json diff --git a/data/scenarios/medium/medium_0063.json b/polyguard-rl/data/scenarios/medium/medium_0063.json similarity index 100% rename from data/scenarios/medium/medium_0063.json rename to polyguard-rl/data/scenarios/medium/medium_0063.json diff --git a/data/scenarios/medium/medium_0064.json b/polyguard-rl/data/scenarios/medium/medium_0064.json similarity index 100% rename from data/scenarios/medium/medium_0064.json rename to polyguard-rl/data/scenarios/medium/medium_0064.json diff --git a/data/scenarios/medium/medium_0065.json b/polyguard-rl/data/scenarios/medium/medium_0065.json similarity index 100% rename from data/scenarios/medium/medium_0065.json rename to polyguard-rl/data/scenarios/medium/medium_0065.json diff --git a/data/scenarios/medium/medium_0066.json b/polyguard-rl/data/scenarios/medium/medium_0066.json similarity index 100% rename from data/scenarios/medium/medium_0066.json rename to polyguard-rl/data/scenarios/medium/medium_0066.json diff --git a/data/scenarios/medium/medium_0067.json b/polyguard-rl/data/scenarios/medium/medium_0067.json similarity index 100% rename from data/scenarios/medium/medium_0067.json rename to polyguard-rl/data/scenarios/medium/medium_0067.json diff --git a/data/scenarios/medium/medium_0068.json b/polyguard-rl/data/scenarios/medium/medium_0068.json similarity index 100% rename from data/scenarios/medium/medium_0068.json rename to polyguard-rl/data/scenarios/medium/medium_0068.json diff --git a/data/scenarios/medium/medium_0069.json b/polyguard-rl/data/scenarios/medium/medium_0069.json similarity index 100% rename from data/scenarios/medium/medium_0069.json rename to polyguard-rl/data/scenarios/medium/medium_0069.json diff --git a/data/scenarios/medium/medium_0070.json b/polyguard-rl/data/scenarios/medium/medium_0070.json similarity index 100% rename from data/scenarios/medium/medium_0070.json rename to polyguard-rl/data/scenarios/medium/medium_0070.json diff --git a/data/scenarios/medium/medium_0071.json b/polyguard-rl/data/scenarios/medium/medium_0071.json similarity index 100% rename from data/scenarios/medium/medium_0071.json rename to polyguard-rl/data/scenarios/medium/medium_0071.json diff --git a/data/scenarios/medium/medium_0072.json b/polyguard-rl/data/scenarios/medium/medium_0072.json similarity index 100% rename from data/scenarios/medium/medium_0072.json rename to polyguard-rl/data/scenarios/medium/medium_0072.json diff --git a/data/scenarios/medium/medium_0073.json b/polyguard-rl/data/scenarios/medium/medium_0073.json similarity index 100% rename from data/scenarios/medium/medium_0073.json rename to polyguard-rl/data/scenarios/medium/medium_0073.json diff --git a/data/scenarios/medium/medium_0074.json b/polyguard-rl/data/scenarios/medium/medium_0074.json similarity index 100% rename from data/scenarios/medium/medium_0074.json rename to polyguard-rl/data/scenarios/medium/medium_0074.json diff --git a/data/scenarios/medium/medium_0075.json b/polyguard-rl/data/scenarios/medium/medium_0075.json similarity index 100% rename from data/scenarios/medium/medium_0075.json rename to polyguard-rl/data/scenarios/medium/medium_0075.json diff --git a/data/scenarios/medium/medium_0076.json b/polyguard-rl/data/scenarios/medium/medium_0076.json similarity index 100% rename from data/scenarios/medium/medium_0076.json rename to polyguard-rl/data/scenarios/medium/medium_0076.json diff --git a/data/scenarios/medium/medium_0077.json b/polyguard-rl/data/scenarios/medium/medium_0077.json similarity index 100% rename from data/scenarios/medium/medium_0077.json rename to polyguard-rl/data/scenarios/medium/medium_0077.json diff --git a/data/scenarios/medium/medium_0078.json b/polyguard-rl/data/scenarios/medium/medium_0078.json similarity index 100% rename from data/scenarios/medium/medium_0078.json rename to polyguard-rl/data/scenarios/medium/medium_0078.json diff --git a/data/scenarios/medium/medium_0079.json b/polyguard-rl/data/scenarios/medium/medium_0079.json similarity index 100% rename from data/scenarios/medium/medium_0079.json rename to polyguard-rl/data/scenarios/medium/medium_0079.json diff --git a/data/scenarios/medium/medium_0080.json b/polyguard-rl/data/scenarios/medium/medium_0080.json similarity index 100% rename from data/scenarios/medium/medium_0080.json rename to polyguard-rl/data/scenarios/medium/medium_0080.json diff --git a/data/scenarios/medium/medium_0081.json b/polyguard-rl/data/scenarios/medium/medium_0081.json similarity index 100% rename from data/scenarios/medium/medium_0081.json rename to polyguard-rl/data/scenarios/medium/medium_0081.json diff --git a/data/scenarios/medium/medium_0082.json b/polyguard-rl/data/scenarios/medium/medium_0082.json similarity index 100% rename from data/scenarios/medium/medium_0082.json rename to polyguard-rl/data/scenarios/medium/medium_0082.json diff --git a/data/scenarios/medium/medium_0083.json b/polyguard-rl/data/scenarios/medium/medium_0083.json similarity index 100% rename from data/scenarios/medium/medium_0083.json rename to polyguard-rl/data/scenarios/medium/medium_0083.json diff --git a/data/scenarios/medium/medium_0084.json b/polyguard-rl/data/scenarios/medium/medium_0084.json similarity index 100% rename from data/scenarios/medium/medium_0084.json rename to polyguard-rl/data/scenarios/medium/medium_0084.json diff --git a/data/scenarios/medium/medium_0085.json b/polyguard-rl/data/scenarios/medium/medium_0085.json similarity index 100% rename from data/scenarios/medium/medium_0085.json rename to polyguard-rl/data/scenarios/medium/medium_0085.json diff --git a/data/scenarios/medium/medium_0086.json b/polyguard-rl/data/scenarios/medium/medium_0086.json similarity index 100% rename from data/scenarios/medium/medium_0086.json rename to polyguard-rl/data/scenarios/medium/medium_0086.json diff --git a/data/scenarios/medium/medium_0087.json b/polyguard-rl/data/scenarios/medium/medium_0087.json similarity index 100% rename from data/scenarios/medium/medium_0087.json rename to polyguard-rl/data/scenarios/medium/medium_0087.json diff --git a/data/scenarios/medium/medium_0088.json b/polyguard-rl/data/scenarios/medium/medium_0088.json similarity index 100% rename from data/scenarios/medium/medium_0088.json rename to polyguard-rl/data/scenarios/medium/medium_0088.json diff --git a/data/scenarios/medium/medium_0089.json b/polyguard-rl/data/scenarios/medium/medium_0089.json similarity index 100% rename from data/scenarios/medium/medium_0089.json rename to polyguard-rl/data/scenarios/medium/medium_0089.json diff --git a/data/scenarios/medium/medium_0090.json b/polyguard-rl/data/scenarios/medium/medium_0090.json similarity index 100% rename from data/scenarios/medium/medium_0090.json rename to polyguard-rl/data/scenarios/medium/medium_0090.json diff --git a/data/scenarios/medium/medium_0091.json b/polyguard-rl/data/scenarios/medium/medium_0091.json similarity index 100% rename from data/scenarios/medium/medium_0091.json rename to polyguard-rl/data/scenarios/medium/medium_0091.json diff --git a/data/scenarios/medium/medium_0092.json b/polyguard-rl/data/scenarios/medium/medium_0092.json similarity index 100% rename from data/scenarios/medium/medium_0092.json rename to polyguard-rl/data/scenarios/medium/medium_0092.json diff --git a/data/scenarios/medium/medium_0093.json b/polyguard-rl/data/scenarios/medium/medium_0093.json similarity index 100% rename from data/scenarios/medium/medium_0093.json rename to polyguard-rl/data/scenarios/medium/medium_0093.json diff --git a/data/scenarios/medium/medium_0094.json b/polyguard-rl/data/scenarios/medium/medium_0094.json similarity index 100% rename from data/scenarios/medium/medium_0094.json rename to polyguard-rl/data/scenarios/medium/medium_0094.json diff --git a/data/scenarios/medium/medium_0095.json b/polyguard-rl/data/scenarios/medium/medium_0095.json similarity index 100% rename from data/scenarios/medium/medium_0095.json rename to polyguard-rl/data/scenarios/medium/medium_0095.json diff --git a/data/scenarios/medium/medium_0096.json b/polyguard-rl/data/scenarios/medium/medium_0096.json similarity index 100% rename from data/scenarios/medium/medium_0096.json rename to polyguard-rl/data/scenarios/medium/medium_0096.json diff --git a/data/scenarios/medium/medium_0097.json b/polyguard-rl/data/scenarios/medium/medium_0097.json similarity index 100% rename from data/scenarios/medium/medium_0097.json rename to polyguard-rl/data/scenarios/medium/medium_0097.json diff --git a/data/scenarios/medium/medium_0098.json b/polyguard-rl/data/scenarios/medium/medium_0098.json similarity index 100% rename from data/scenarios/medium/medium_0098.json rename to polyguard-rl/data/scenarios/medium/medium_0098.json diff --git a/data/scenarios/medium/medium_0099.json b/polyguard-rl/data/scenarios/medium/medium_0099.json similarity index 100% rename from data/scenarios/medium/medium_0099.json rename to polyguard-rl/data/scenarios/medium/medium_0099.json diff --git a/data/scenarios/medium/medium_0100.json b/polyguard-rl/data/scenarios/medium/medium_0100.json similarity index 100% rename from data/scenarios/medium/medium_0100.json rename to polyguard-rl/data/scenarios/medium/medium_0100.json diff --git a/data/scenarios/medium/medium_0101.json b/polyguard-rl/data/scenarios/medium/medium_0101.json similarity index 100% rename from data/scenarios/medium/medium_0101.json rename to polyguard-rl/data/scenarios/medium/medium_0101.json diff --git a/data/scenarios/medium/medium_0102.json b/polyguard-rl/data/scenarios/medium/medium_0102.json similarity index 100% rename from data/scenarios/medium/medium_0102.json rename to polyguard-rl/data/scenarios/medium/medium_0102.json diff --git a/data/scenarios/medium/medium_0103.json b/polyguard-rl/data/scenarios/medium/medium_0103.json similarity index 100% rename from data/scenarios/medium/medium_0103.json rename to polyguard-rl/data/scenarios/medium/medium_0103.json diff --git a/data/scenarios/medium/medium_0104.json b/polyguard-rl/data/scenarios/medium/medium_0104.json similarity index 100% rename from data/scenarios/medium/medium_0104.json rename to polyguard-rl/data/scenarios/medium/medium_0104.json diff --git a/data/scenarios/medium/medium_0105.json b/polyguard-rl/data/scenarios/medium/medium_0105.json similarity index 100% rename from data/scenarios/medium/medium_0105.json rename to polyguard-rl/data/scenarios/medium/medium_0105.json diff --git a/data/scenarios/medium/medium_0106.json b/polyguard-rl/data/scenarios/medium/medium_0106.json similarity index 100% rename from data/scenarios/medium/medium_0106.json rename to polyguard-rl/data/scenarios/medium/medium_0106.json diff --git a/data/scenarios/medium/medium_0107.json b/polyguard-rl/data/scenarios/medium/medium_0107.json similarity index 100% rename from data/scenarios/medium/medium_0107.json rename to polyguard-rl/data/scenarios/medium/medium_0107.json diff --git a/data/scenarios/medium/medium_0108.json b/polyguard-rl/data/scenarios/medium/medium_0108.json similarity index 100% rename from data/scenarios/medium/medium_0108.json rename to polyguard-rl/data/scenarios/medium/medium_0108.json diff --git a/data/scenarios/medium/medium_0109.json b/polyguard-rl/data/scenarios/medium/medium_0109.json similarity index 100% rename from data/scenarios/medium/medium_0109.json rename to polyguard-rl/data/scenarios/medium/medium_0109.json diff --git a/data/scenarios/medium/medium_0110.json b/polyguard-rl/data/scenarios/medium/medium_0110.json similarity index 100% rename from data/scenarios/medium/medium_0110.json rename to polyguard-rl/data/scenarios/medium/medium_0110.json diff --git a/data/scenarios/medium/medium_0111.json b/polyguard-rl/data/scenarios/medium/medium_0111.json similarity index 100% rename from data/scenarios/medium/medium_0111.json rename to polyguard-rl/data/scenarios/medium/medium_0111.json diff --git a/data/scenarios/medium/medium_0112.json b/polyguard-rl/data/scenarios/medium/medium_0112.json similarity index 100% rename from data/scenarios/medium/medium_0112.json rename to polyguard-rl/data/scenarios/medium/medium_0112.json diff --git a/data/scenarios/medium/medium_0113.json b/polyguard-rl/data/scenarios/medium/medium_0113.json similarity index 100% rename from data/scenarios/medium/medium_0113.json rename to polyguard-rl/data/scenarios/medium/medium_0113.json diff --git a/data/scenarios/medium/medium_0114.json b/polyguard-rl/data/scenarios/medium/medium_0114.json similarity index 100% rename from data/scenarios/medium/medium_0114.json rename to polyguard-rl/data/scenarios/medium/medium_0114.json diff --git a/data/scenarios/medium/medium_0115.json b/polyguard-rl/data/scenarios/medium/medium_0115.json similarity index 100% rename from data/scenarios/medium/medium_0115.json rename to polyguard-rl/data/scenarios/medium/medium_0115.json diff --git a/data/scenarios/medium/medium_0116.json b/polyguard-rl/data/scenarios/medium/medium_0116.json similarity index 100% rename from data/scenarios/medium/medium_0116.json rename to polyguard-rl/data/scenarios/medium/medium_0116.json diff --git a/data/scenarios/medium/medium_0117.json b/polyguard-rl/data/scenarios/medium/medium_0117.json similarity index 100% rename from data/scenarios/medium/medium_0117.json rename to polyguard-rl/data/scenarios/medium/medium_0117.json diff --git a/data/scenarios/medium/medium_0118.json b/polyguard-rl/data/scenarios/medium/medium_0118.json similarity index 100% rename from data/scenarios/medium/medium_0118.json rename to polyguard-rl/data/scenarios/medium/medium_0118.json diff --git a/data/scenarios/medium/medium_0119.json b/polyguard-rl/data/scenarios/medium/medium_0119.json similarity index 100% rename from data/scenarios/medium/medium_0119.json rename to polyguard-rl/data/scenarios/medium/medium_0119.json diff --git a/data/scenarios/medium/medium_0120.json b/polyguard-rl/data/scenarios/medium/medium_0120.json similarity index 100% rename from data/scenarios/medium/medium_0120.json rename to polyguard-rl/data/scenarios/medium/medium_0120.json diff --git a/data/scenarios/medium/medium_0121.json b/polyguard-rl/data/scenarios/medium/medium_0121.json similarity index 100% rename from data/scenarios/medium/medium_0121.json rename to polyguard-rl/data/scenarios/medium/medium_0121.json diff --git a/data/scenarios/medium/medium_0122.json b/polyguard-rl/data/scenarios/medium/medium_0122.json similarity index 100% rename from data/scenarios/medium/medium_0122.json rename to polyguard-rl/data/scenarios/medium/medium_0122.json diff --git a/data/scenarios/medium/medium_0123.json b/polyguard-rl/data/scenarios/medium/medium_0123.json similarity index 100% rename from data/scenarios/medium/medium_0123.json rename to polyguard-rl/data/scenarios/medium/medium_0123.json diff --git a/data/scenarios/medium/medium_0124.json b/polyguard-rl/data/scenarios/medium/medium_0124.json similarity index 100% rename from data/scenarios/medium/medium_0124.json rename to polyguard-rl/data/scenarios/medium/medium_0124.json diff --git a/data/scenarios/medium/medium_0125.json b/polyguard-rl/data/scenarios/medium/medium_0125.json similarity index 100% rename from data/scenarios/medium/medium_0125.json rename to polyguard-rl/data/scenarios/medium/medium_0125.json diff --git a/data/scenarios/medium/medium_0126.json b/polyguard-rl/data/scenarios/medium/medium_0126.json similarity index 100% rename from data/scenarios/medium/medium_0126.json rename to polyguard-rl/data/scenarios/medium/medium_0126.json diff --git a/data/scenarios/medium/medium_0127.json b/polyguard-rl/data/scenarios/medium/medium_0127.json similarity index 100% rename from data/scenarios/medium/medium_0127.json rename to polyguard-rl/data/scenarios/medium/medium_0127.json diff --git a/data/scenarios/medium/medium_0128.json b/polyguard-rl/data/scenarios/medium/medium_0128.json similarity index 100% rename from data/scenarios/medium/medium_0128.json rename to polyguard-rl/data/scenarios/medium/medium_0128.json diff --git a/data/scenarios/medium/medium_0129.json b/polyguard-rl/data/scenarios/medium/medium_0129.json similarity index 100% rename from data/scenarios/medium/medium_0129.json rename to polyguard-rl/data/scenarios/medium/medium_0129.json diff --git a/data/scenarios/medium/medium_0130.json b/polyguard-rl/data/scenarios/medium/medium_0130.json similarity index 100% rename from data/scenarios/medium/medium_0130.json rename to polyguard-rl/data/scenarios/medium/medium_0130.json diff --git a/data/scenarios/medium/medium_0131.json b/polyguard-rl/data/scenarios/medium/medium_0131.json similarity index 100% rename from data/scenarios/medium/medium_0131.json rename to polyguard-rl/data/scenarios/medium/medium_0131.json diff --git a/data/scenarios/medium/medium_0132.json b/polyguard-rl/data/scenarios/medium/medium_0132.json similarity index 100% rename from data/scenarios/medium/medium_0132.json rename to polyguard-rl/data/scenarios/medium/medium_0132.json diff --git a/data/scenarios/medium/medium_0133.json b/polyguard-rl/data/scenarios/medium/medium_0133.json similarity index 100% rename from data/scenarios/medium/medium_0133.json rename to polyguard-rl/data/scenarios/medium/medium_0133.json diff --git a/data/scenarios/medium/medium_0134.json b/polyguard-rl/data/scenarios/medium/medium_0134.json similarity index 100% rename from data/scenarios/medium/medium_0134.json rename to polyguard-rl/data/scenarios/medium/medium_0134.json diff --git a/data/scenarios/medium/medium_0135.json b/polyguard-rl/data/scenarios/medium/medium_0135.json similarity index 100% rename from data/scenarios/medium/medium_0135.json rename to polyguard-rl/data/scenarios/medium/medium_0135.json diff --git a/data/scenarios/medium/medium_0136.json b/polyguard-rl/data/scenarios/medium/medium_0136.json similarity index 100% rename from data/scenarios/medium/medium_0136.json rename to polyguard-rl/data/scenarios/medium/medium_0136.json diff --git a/data/scenarios/medium/medium_0137.json b/polyguard-rl/data/scenarios/medium/medium_0137.json similarity index 100% rename from data/scenarios/medium/medium_0137.json rename to polyguard-rl/data/scenarios/medium/medium_0137.json diff --git a/data/scenarios/medium/medium_0138.json b/polyguard-rl/data/scenarios/medium/medium_0138.json similarity index 100% rename from data/scenarios/medium/medium_0138.json rename to polyguard-rl/data/scenarios/medium/medium_0138.json diff --git a/data/scenarios/medium/medium_0139.json b/polyguard-rl/data/scenarios/medium/medium_0139.json similarity index 100% rename from data/scenarios/medium/medium_0139.json rename to polyguard-rl/data/scenarios/medium/medium_0139.json diff --git a/data/scenarios/medium/medium_0140.json b/polyguard-rl/data/scenarios/medium/medium_0140.json similarity index 100% rename from data/scenarios/medium/medium_0140.json rename to polyguard-rl/data/scenarios/medium/medium_0140.json diff --git a/data/scenarios/medium/medium_0141.json b/polyguard-rl/data/scenarios/medium/medium_0141.json similarity index 100% rename from data/scenarios/medium/medium_0141.json rename to polyguard-rl/data/scenarios/medium/medium_0141.json diff --git a/data/scenarios/medium/medium_0142.json b/polyguard-rl/data/scenarios/medium/medium_0142.json similarity index 100% rename from data/scenarios/medium/medium_0142.json rename to polyguard-rl/data/scenarios/medium/medium_0142.json diff --git a/data/scenarios/medium/medium_0143.json b/polyguard-rl/data/scenarios/medium/medium_0143.json similarity index 100% rename from data/scenarios/medium/medium_0143.json rename to polyguard-rl/data/scenarios/medium/medium_0143.json diff --git a/data/scenarios/medium/medium_0144.json b/polyguard-rl/data/scenarios/medium/medium_0144.json similarity index 100% rename from data/scenarios/medium/medium_0144.json rename to polyguard-rl/data/scenarios/medium/medium_0144.json diff --git a/data/scenarios/medium/medium_0145.json b/polyguard-rl/data/scenarios/medium/medium_0145.json similarity index 100% rename from data/scenarios/medium/medium_0145.json rename to polyguard-rl/data/scenarios/medium/medium_0145.json diff --git a/data/scenarios/medium/medium_0146.json b/polyguard-rl/data/scenarios/medium/medium_0146.json similarity index 100% rename from data/scenarios/medium/medium_0146.json rename to polyguard-rl/data/scenarios/medium/medium_0146.json diff --git a/data/scenarios/medium/medium_0147.json b/polyguard-rl/data/scenarios/medium/medium_0147.json similarity index 100% rename from data/scenarios/medium/medium_0147.json rename to polyguard-rl/data/scenarios/medium/medium_0147.json diff --git a/data/scenarios/medium/medium_0148.json b/polyguard-rl/data/scenarios/medium/medium_0148.json similarity index 100% rename from data/scenarios/medium/medium_0148.json rename to polyguard-rl/data/scenarios/medium/medium_0148.json diff --git a/data/scenarios/medium/medium_0149.json b/polyguard-rl/data/scenarios/medium/medium_0149.json similarity index 100% rename from data/scenarios/medium/medium_0149.json rename to polyguard-rl/data/scenarios/medium/medium_0149.json diff --git a/data/scenarios/medium/medium_0150.json b/polyguard-rl/data/scenarios/medium/medium_0150.json similarity index 100% rename from data/scenarios/medium/medium_0150.json rename to polyguard-rl/data/scenarios/medium/medium_0150.json diff --git a/data/scenarios/medium/medium_0151.json b/polyguard-rl/data/scenarios/medium/medium_0151.json similarity index 100% rename from data/scenarios/medium/medium_0151.json rename to polyguard-rl/data/scenarios/medium/medium_0151.json diff --git a/data/scenarios/medium/medium_0152.json b/polyguard-rl/data/scenarios/medium/medium_0152.json similarity index 100% rename from data/scenarios/medium/medium_0152.json rename to polyguard-rl/data/scenarios/medium/medium_0152.json diff --git a/data/scenarios/medium/medium_0153.json b/polyguard-rl/data/scenarios/medium/medium_0153.json similarity index 100% rename from data/scenarios/medium/medium_0153.json rename to polyguard-rl/data/scenarios/medium/medium_0153.json diff --git a/data/scenarios/medium/medium_0154.json b/polyguard-rl/data/scenarios/medium/medium_0154.json similarity index 100% rename from data/scenarios/medium/medium_0154.json rename to polyguard-rl/data/scenarios/medium/medium_0154.json diff --git a/data/scenarios/medium/medium_0155.json b/polyguard-rl/data/scenarios/medium/medium_0155.json similarity index 100% rename from data/scenarios/medium/medium_0155.json rename to polyguard-rl/data/scenarios/medium/medium_0155.json diff --git a/data/scenarios/medium/medium_0156.json b/polyguard-rl/data/scenarios/medium/medium_0156.json similarity index 100% rename from data/scenarios/medium/medium_0156.json rename to polyguard-rl/data/scenarios/medium/medium_0156.json diff --git a/data/scenarios/medium/medium_0157.json b/polyguard-rl/data/scenarios/medium/medium_0157.json similarity index 100% rename from data/scenarios/medium/medium_0157.json rename to polyguard-rl/data/scenarios/medium/medium_0157.json diff --git a/data/scenarios/medium/medium_0158.json b/polyguard-rl/data/scenarios/medium/medium_0158.json similarity index 100% rename from data/scenarios/medium/medium_0158.json rename to polyguard-rl/data/scenarios/medium/medium_0158.json diff --git a/data/scenarios/medium/medium_0159.json b/polyguard-rl/data/scenarios/medium/medium_0159.json similarity index 100% rename from data/scenarios/medium/medium_0159.json rename to polyguard-rl/data/scenarios/medium/medium_0159.json diff --git a/data/scenarios/medium/medium_0160.json b/polyguard-rl/data/scenarios/medium/medium_0160.json similarity index 100% rename from data/scenarios/medium/medium_0160.json rename to polyguard-rl/data/scenarios/medium/medium_0160.json diff --git a/data/scenarios/medium/medium_0161.json b/polyguard-rl/data/scenarios/medium/medium_0161.json similarity index 100% rename from data/scenarios/medium/medium_0161.json rename to polyguard-rl/data/scenarios/medium/medium_0161.json diff --git a/data/scenarios/medium/medium_0162.json b/polyguard-rl/data/scenarios/medium/medium_0162.json similarity index 100% rename from data/scenarios/medium/medium_0162.json rename to polyguard-rl/data/scenarios/medium/medium_0162.json diff --git a/data/scenarios/medium/medium_0163.json b/polyguard-rl/data/scenarios/medium/medium_0163.json similarity index 100% rename from data/scenarios/medium/medium_0163.json rename to polyguard-rl/data/scenarios/medium/medium_0163.json diff --git a/data/scenarios/medium/medium_0164.json b/polyguard-rl/data/scenarios/medium/medium_0164.json similarity index 100% rename from data/scenarios/medium/medium_0164.json rename to polyguard-rl/data/scenarios/medium/medium_0164.json diff --git a/data/scenarios/medium/medium_0165.json b/polyguard-rl/data/scenarios/medium/medium_0165.json similarity index 100% rename from data/scenarios/medium/medium_0165.json rename to polyguard-rl/data/scenarios/medium/medium_0165.json diff --git a/data/scenarios/medium/medium_0166.json b/polyguard-rl/data/scenarios/medium/medium_0166.json similarity index 100% rename from data/scenarios/medium/medium_0166.json rename to polyguard-rl/data/scenarios/medium/medium_0166.json diff --git a/data/scenarios/medium/medium_0167.json b/polyguard-rl/data/scenarios/medium/medium_0167.json similarity index 100% rename from data/scenarios/medium/medium_0167.json rename to polyguard-rl/data/scenarios/medium/medium_0167.json diff --git a/data/scenarios/medium/medium_0168.json b/polyguard-rl/data/scenarios/medium/medium_0168.json similarity index 100% rename from data/scenarios/medium/medium_0168.json rename to polyguard-rl/data/scenarios/medium/medium_0168.json diff --git a/data/scenarios/medium/medium_0169.json b/polyguard-rl/data/scenarios/medium/medium_0169.json similarity index 100% rename from data/scenarios/medium/medium_0169.json rename to polyguard-rl/data/scenarios/medium/medium_0169.json diff --git a/data/scenarios/medium/medium_0170.json b/polyguard-rl/data/scenarios/medium/medium_0170.json similarity index 100% rename from data/scenarios/medium/medium_0170.json rename to polyguard-rl/data/scenarios/medium/medium_0170.json diff --git a/data/scenarios/medium/medium_0171.json b/polyguard-rl/data/scenarios/medium/medium_0171.json similarity index 100% rename from data/scenarios/medium/medium_0171.json rename to polyguard-rl/data/scenarios/medium/medium_0171.json diff --git a/data/scenarios/medium/medium_0172.json b/polyguard-rl/data/scenarios/medium/medium_0172.json similarity index 100% rename from data/scenarios/medium/medium_0172.json rename to polyguard-rl/data/scenarios/medium/medium_0172.json diff --git a/data/scenarios/medium/medium_0173.json b/polyguard-rl/data/scenarios/medium/medium_0173.json similarity index 100% rename from data/scenarios/medium/medium_0173.json rename to polyguard-rl/data/scenarios/medium/medium_0173.json diff --git a/data/scenarios/medium/medium_0174.json b/polyguard-rl/data/scenarios/medium/medium_0174.json similarity index 100% rename from data/scenarios/medium/medium_0174.json rename to polyguard-rl/data/scenarios/medium/medium_0174.json diff --git a/data/scenarios/medium/medium_0175.json b/polyguard-rl/data/scenarios/medium/medium_0175.json similarity index 100% rename from data/scenarios/medium/medium_0175.json rename to polyguard-rl/data/scenarios/medium/medium_0175.json diff --git a/data/scenarios/medium/medium_0176.json b/polyguard-rl/data/scenarios/medium/medium_0176.json similarity index 100% rename from data/scenarios/medium/medium_0176.json rename to polyguard-rl/data/scenarios/medium/medium_0176.json diff --git a/data/scenarios/medium/medium_0177.json b/polyguard-rl/data/scenarios/medium/medium_0177.json similarity index 100% rename from data/scenarios/medium/medium_0177.json rename to polyguard-rl/data/scenarios/medium/medium_0177.json diff --git a/data/scenarios/medium/medium_0178.json b/polyguard-rl/data/scenarios/medium/medium_0178.json similarity index 100% rename from data/scenarios/medium/medium_0178.json rename to polyguard-rl/data/scenarios/medium/medium_0178.json diff --git a/data/scenarios/medium/medium_0179.json b/polyguard-rl/data/scenarios/medium/medium_0179.json similarity index 100% rename from data/scenarios/medium/medium_0179.json rename to polyguard-rl/data/scenarios/medium/medium_0179.json diff --git a/data/scenarios/medium/medium_0180.json b/polyguard-rl/data/scenarios/medium/medium_0180.json similarity index 100% rename from data/scenarios/medium/medium_0180.json rename to polyguard-rl/data/scenarios/medium/medium_0180.json diff --git a/data/scenarios/medium/medium_0181.json b/polyguard-rl/data/scenarios/medium/medium_0181.json similarity index 100% rename from data/scenarios/medium/medium_0181.json rename to polyguard-rl/data/scenarios/medium/medium_0181.json diff --git a/data/scenarios/medium/medium_0182.json b/polyguard-rl/data/scenarios/medium/medium_0182.json similarity index 100% rename from data/scenarios/medium/medium_0182.json rename to polyguard-rl/data/scenarios/medium/medium_0182.json diff --git a/data/scenarios/medium/medium_0183.json b/polyguard-rl/data/scenarios/medium/medium_0183.json similarity index 100% rename from data/scenarios/medium/medium_0183.json rename to polyguard-rl/data/scenarios/medium/medium_0183.json diff --git a/data/scenarios/medium/medium_0184.json b/polyguard-rl/data/scenarios/medium/medium_0184.json similarity index 100% rename from data/scenarios/medium/medium_0184.json rename to polyguard-rl/data/scenarios/medium/medium_0184.json diff --git a/data/scenarios/medium/medium_0185.json b/polyguard-rl/data/scenarios/medium/medium_0185.json similarity index 100% rename from data/scenarios/medium/medium_0185.json rename to polyguard-rl/data/scenarios/medium/medium_0185.json diff --git a/data/scenarios/medium/medium_0186.json b/polyguard-rl/data/scenarios/medium/medium_0186.json similarity index 100% rename from data/scenarios/medium/medium_0186.json rename to polyguard-rl/data/scenarios/medium/medium_0186.json diff --git a/data/scenarios/medium/medium_0187.json b/polyguard-rl/data/scenarios/medium/medium_0187.json similarity index 100% rename from data/scenarios/medium/medium_0187.json rename to polyguard-rl/data/scenarios/medium/medium_0187.json diff --git a/data/scenarios/medium/medium_0188.json b/polyguard-rl/data/scenarios/medium/medium_0188.json similarity index 100% rename from data/scenarios/medium/medium_0188.json rename to polyguard-rl/data/scenarios/medium/medium_0188.json diff --git a/data/scenarios/medium/medium_0189.json b/polyguard-rl/data/scenarios/medium/medium_0189.json similarity index 100% rename from data/scenarios/medium/medium_0189.json rename to polyguard-rl/data/scenarios/medium/medium_0189.json diff --git a/data/scenarios/medium/medium_0190.json b/polyguard-rl/data/scenarios/medium/medium_0190.json similarity index 100% rename from data/scenarios/medium/medium_0190.json rename to polyguard-rl/data/scenarios/medium/medium_0190.json diff --git a/data/scenarios/medium/medium_0191.json b/polyguard-rl/data/scenarios/medium/medium_0191.json similarity index 100% rename from data/scenarios/medium/medium_0191.json rename to polyguard-rl/data/scenarios/medium/medium_0191.json diff --git a/data/scenarios/medium/medium_0192.json b/polyguard-rl/data/scenarios/medium/medium_0192.json similarity index 100% rename from data/scenarios/medium/medium_0192.json rename to polyguard-rl/data/scenarios/medium/medium_0192.json diff --git a/data/scenarios/medium/medium_0193.json b/polyguard-rl/data/scenarios/medium/medium_0193.json similarity index 100% rename from data/scenarios/medium/medium_0193.json rename to polyguard-rl/data/scenarios/medium/medium_0193.json diff --git a/data/scenarios/medium/medium_0194.json b/polyguard-rl/data/scenarios/medium/medium_0194.json similarity index 100% rename from data/scenarios/medium/medium_0194.json rename to polyguard-rl/data/scenarios/medium/medium_0194.json diff --git a/data/scenarios/medium/medium_0195.json b/polyguard-rl/data/scenarios/medium/medium_0195.json similarity index 100% rename from data/scenarios/medium/medium_0195.json rename to polyguard-rl/data/scenarios/medium/medium_0195.json diff --git a/data/scenarios/medium/medium_0196.json b/polyguard-rl/data/scenarios/medium/medium_0196.json similarity index 100% rename from data/scenarios/medium/medium_0196.json rename to polyguard-rl/data/scenarios/medium/medium_0196.json diff --git a/data/scenarios/medium/medium_0197.json b/polyguard-rl/data/scenarios/medium/medium_0197.json similarity index 100% rename from data/scenarios/medium/medium_0197.json rename to polyguard-rl/data/scenarios/medium/medium_0197.json diff --git a/data/scenarios/medium/medium_0198.json b/polyguard-rl/data/scenarios/medium/medium_0198.json similarity index 100% rename from data/scenarios/medium/medium_0198.json rename to polyguard-rl/data/scenarios/medium/medium_0198.json diff --git a/data/scenarios/medium/medium_0199.json b/polyguard-rl/data/scenarios/medium/medium_0199.json similarity index 100% rename from data/scenarios/medium/medium_0199.json rename to polyguard-rl/data/scenarios/medium/medium_0199.json diff --git a/data/scenarios/scenarios_easy.jsonl b/polyguard-rl/data/scenarios/scenarios_easy.jsonl similarity index 100% rename from data/scenarios/scenarios_easy.jsonl rename to polyguard-rl/data/scenarios/scenarios_easy.jsonl diff --git a/data/scenarios/scenarios_hard.jsonl b/polyguard-rl/data/scenarios/scenarios_hard.jsonl similarity index 100% rename from data/scenarios/scenarios_hard.jsonl rename to polyguard-rl/data/scenarios/scenarios_hard.jsonl diff --git a/data/scenarios/scenarios_medium.jsonl b/polyguard-rl/data/scenarios/scenarios_medium.jsonl similarity index 100% rename from data/scenarios/scenarios_medium.jsonl rename to polyguard-rl/data/scenarios/scenarios_medium.jsonl diff --git a/docker-compose.yml b/polyguard-rl/docker-compose.yml similarity index 100% rename from docker-compose.yml rename to polyguard-rl/docker-compose.yml diff --git a/docker/space/README.md b/polyguard-rl/docker/space/README.md similarity index 100% rename from docker/space/README.md rename to polyguard-rl/docker/space/README.md diff --git a/docker/space/entrypoint.sh b/polyguard-rl/docker/space/entrypoint.sh similarity index 100% rename from docker/space/entrypoint.sh rename to polyguard-rl/docker/space/entrypoint.sh diff --git a/docker/space/nginx.conf.template b/polyguard-rl/docker/space/nginx.conf.template similarity index 100% rename from docker/space/nginx.conf.template rename to polyguard-rl/docker/space/nginx.conf.template diff --git a/docs/DEMO_RECORDING_SCRIPT.md b/polyguard-rl/docs/DEMO_RECORDING_SCRIPT.md similarity index 100% rename from docs/DEMO_RECORDING_SCRIPT.md rename to polyguard-rl/docs/DEMO_RECORDING_SCRIPT.md diff --git a/docs/UI Images/1.jpeg b/polyguard-rl/docs/UI Images/1.jpeg similarity index 100% rename from docs/UI Images/1.jpeg rename to polyguard-rl/docs/UI Images/1.jpeg diff --git a/docs/UI Images/2.jpeg b/polyguard-rl/docs/UI Images/2.jpeg similarity index 100% rename from docs/UI Images/2.jpeg rename to polyguard-rl/docs/UI Images/2.jpeg diff --git a/docs/UI Images/3.jpeg b/polyguard-rl/docs/UI Images/3.jpeg similarity index 100% rename from docs/UI Images/3.jpeg rename to polyguard-rl/docs/UI Images/3.jpeg diff --git a/docs/UI Images/4.jpeg b/polyguard-rl/docs/UI Images/4.jpeg similarity index 100% rename from docs/UI Images/4.jpeg rename to polyguard-rl/docs/UI Images/4.jpeg diff --git a/docs/UI Images/5.jpeg b/polyguard-rl/docs/UI Images/5.jpeg similarity index 100% rename from docs/UI Images/5.jpeg rename to polyguard-rl/docs/UI Images/5.jpeg diff --git a/docs/ablations.md b/polyguard-rl/docs/ablations.md similarity index 100% rename from docs/ablations.md rename to polyguard-rl/docs/ablations.md diff --git a/docs/agents.md b/polyguard-rl/docs/agents.md similarity index 100% rename from docs/agents.md rename to polyguard-rl/docs/agents.md diff --git a/docs/api.md b/polyguard-rl/docs/api.md similarity index 100% rename from docs/api.md rename to polyguard-rl/docs/api.md diff --git a/docs/architecture.md b/polyguard-rl/docs/architecture.md similarity index 100% rename from docs/architecture.md rename to polyguard-rl/docs/architecture.md diff --git a/docs/assets/diagrams/data_training_pipeline.png b/polyguard-rl/docs/assets/diagrams/data_training_pipeline.png similarity index 100% rename from docs/assets/diagrams/data_training_pipeline.png rename to polyguard-rl/docs/assets/diagrams/data_training_pipeline.png diff --git a/docs/assets/diagrams/deployment_topology.png b/polyguard-rl/docs/assets/diagrams/deployment_topology.png similarity index 100% rename from docs/assets/diagrams/deployment_topology.png rename to polyguard-rl/docs/assets/diagrams/deployment_topology.png diff --git a/polyguard-rl/docs/assets/diagrams/episode_state_machine.png b/polyguard-rl/docs/assets/diagrams/episode_state_machine.png new file mode 100644 index 0000000000000000000000000000000000000000..9fa0c4108616a918340d2df799297fb1af2a7f69 --- /dev/null +++ b/polyguard-rl/docs/assets/diagrams/episode_state_machine.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11967ed80244ab5c95627c41e7b017aec5695fb547f379f34d75e70b13de3d6 +size 75064 diff --git a/polyguard-rl/docs/assets/diagrams/evidence_generation_flow.png b/polyguard-rl/docs/assets/diagrams/evidence_generation_flow.png new file mode 100644 index 0000000000000000000000000000000000000000..a83d5f88abdb56f1b6976c68c7c94077230f6a02 --- /dev/null +++ b/polyguard-rl/docs/assets/diagrams/evidence_generation_flow.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f197472efb402fb883a65fe4cea2db8a5c59b2c0fd65006006c7e59a1cd41b4c +size 80790 diff --git a/docs/assets/diagrams/frontend_runtime_surface.png b/polyguard-rl/docs/assets/diagrams/frontend_runtime_surface.png similarity index 100% rename from docs/assets/diagrams/frontend_runtime_surface.png rename to polyguard-rl/docs/assets/diagrams/frontend_runtime_surface.png diff --git a/docs/assets/diagrams/multi_agent_orchestration.png b/polyguard-rl/docs/assets/diagrams/multi_agent_orchestration.png similarity index 100% rename from docs/assets/diagrams/multi_agent_orchestration.png rename to polyguard-rl/docs/assets/diagrams/multi_agent_orchestration.png diff --git a/docs/assets/diagrams/reward_decomposition.png b/polyguard-rl/docs/assets/diagrams/reward_decomposition.png similarity index 100% rename from docs/assets/diagrams/reward_decomposition.png rename to polyguard-rl/docs/assets/diagrams/reward_decomposition.png diff --git a/polyguard-rl/docs/assets/diagrams/runtime_step_flow.png b/polyguard-rl/docs/assets/diagrams/runtime_step_flow.png new file mode 100644 index 0000000000000000000000000000000000000000..13614c34bf4b1d65f59378372c6eee612941d3e4 --- /dev/null +++ b/polyguard-rl/docs/assets/diagrams/runtime_step_flow.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf04132a22b1b612d9a59a982900e09edbc2669dbaff573ac2a26d9e5fcc1ffd +size 86395 diff --git a/docs/assets/diagrams/system_architecture.png b/polyguard-rl/docs/assets/diagrams/system_architecture.png similarity index 100% rename from docs/assets/diagrams/system_architecture.png rename to polyguard-rl/docs/assets/diagrams/system_architecture.png diff --git a/docs/dataset_report.md b/polyguard-rl/docs/dataset_report.md similarity index 100% rename from docs/dataset_report.md rename to polyguard-rl/docs/dataset_report.md diff --git a/docs/datasets.md b/polyguard-rl/docs/datasets.md similarity index 100% rename from docs/datasets.md rename to polyguard-rl/docs/datasets.md diff --git a/docs/deployment.md b/polyguard-rl/docs/deployment.md similarity index 100% rename from docs/deployment.md rename to polyguard-rl/docs/deployment.md diff --git a/docs/environment_design.md b/polyguard-rl/docs/environment_design.md similarity index 100% rename from docs/environment_design.md rename to polyguard-rl/docs/environment_design.md diff --git a/docs/evaluation.md b/polyguard-rl/docs/evaluation.md similarity index 100% rename from docs/evaluation.md rename to polyguard-rl/docs/evaluation.md diff --git a/docs/graph_models.md b/polyguard-rl/docs/graph_models.md similarity index 100% rename from docs/graph_models.md rename to polyguard-rl/docs/graph_models.md diff --git a/docs/hierarchical_rl.md b/polyguard-rl/docs/hierarchical_rl.md similarity index 100% rename from docs/hierarchical_rl.md rename to polyguard-rl/docs/hierarchical_rl.md diff --git a/docs/idea_document_traceability.md b/polyguard-rl/docs/idea_document_traceability.md similarity index 100% rename from docs/idea_document_traceability.md rename to polyguard-rl/docs/idea_document_traceability.md diff --git a/docs/math.md b/polyguard-rl/docs/math.md similarity index 100% rename from docs/math.md rename to polyguard-rl/docs/math.md diff --git a/docs/mathematics.md b/polyguard-rl/docs/mathematics.md similarity index 100% rename from docs/mathematics.md rename to polyguard-rl/docs/mathematics.md diff --git a/docs/old_repo_adapter_merge.md b/polyguard-rl/docs/old_repo_adapter_merge.md similarity index 100% rename from docs/old_repo_adapter_merge.md rename to polyguard-rl/docs/old_repo_adapter_merge.md diff --git a/docs/participant_guide_traceability.md b/polyguard-rl/docs/participant_guide_traceability.md similarity index 100% rename from docs/participant_guide_traceability.md rename to polyguard-rl/docs/participant_guide_traceability.md diff --git a/docs/precision_dosing.md b/polyguard-rl/docs/precision_dosing.md similarity index 100% rename from docs/precision_dosing.md rename to polyguard-rl/docs/precision_dosing.md diff --git a/docs/results/README.md b/polyguard-rl/docs/results/README.md similarity index 100% rename from docs/results/README.md rename to polyguard-rl/docs/results/README.md diff --git a/docs/results/acceptance_gate.json b/polyguard-rl/docs/results/acceptance_gate.json similarity index 100% rename from docs/results/acceptance_gate.json rename to polyguard-rl/docs/results/acceptance_gate.json diff --git a/docs/results/active_model/acceptance_gate.json b/polyguard-rl/docs/results/active_model/acceptance_gate.json similarity index 100% rename from docs/results/active_model/acceptance_gate.json rename to polyguard-rl/docs/results/active_model/acceptance_gate.json diff --git a/docs/results/active_model/active_model_manifest.json b/polyguard-rl/docs/results/active_model/active_model_manifest.json similarity index 100% rename from docs/results/active_model/active_model_manifest.json rename to polyguard-rl/docs/results/active_model/active_model_manifest.json diff --git a/docs/results/active_model/anti_hacking_overfit_report.json b/polyguard-rl/docs/results/active_model/anti_hacking_overfit_report.json similarity index 100% rename from docs/results/active_model/anti_hacking_overfit_report.json rename to polyguard-rl/docs/results/active_model/anti_hacking_overfit_report.json diff --git a/docs/results/active_model/baselines.json b/polyguard-rl/docs/results/active_model/baselines.json similarity index 100% rename from docs/results/active_model/baselines.json rename to polyguard-rl/docs/results/active_model/baselines.json diff --git a/docs/results/active_model/benchmark_report.json b/polyguard-rl/docs/results/active_model/benchmark_report.json similarity index 100% rename from docs/results/active_model/benchmark_report.json rename to polyguard-rl/docs/results/active_model/benchmark_report.json diff --git a/docs/results/active_model/benchmark_report.txt b/polyguard-rl/docs/results/active_model/benchmark_report.txt similarity index 100% rename from docs/results/active_model/benchmark_report.txt rename to polyguard-rl/docs/results/active_model/benchmark_report.txt diff --git a/docs/results/active_model/dose_train.json b/polyguard-rl/docs/results/active_model/dose_train.json similarity index 100% rename from docs/results/active_model/dose_train.json rename to polyguard-rl/docs/results/active_model/dose_train.json diff --git a/docs/results/active_model/dosing_grpo.json b/polyguard-rl/docs/results/active_model/dosing_grpo.json similarity index 100% rename from docs/results/active_model/dosing_grpo.json rename to polyguard-rl/docs/results/active_model/dosing_grpo.json diff --git a/docs/results/active_model/frontier_ready.json b/polyguard-rl/docs/results/active_model/frontier_ready.json similarity index 100% rename from docs/results/active_model/frontier_ready.json rename to polyguard-rl/docs/results/active_model/frontier_ready.json diff --git a/docs/results/active_model/graph_train.json b/polyguard-rl/docs/results/active_model/graph_train.json similarity index 100% rename from docs/results/active_model/graph_train.json rename to polyguard-rl/docs/results/active_model/graph_train.json diff --git a/docs/results/active_model/grpo_ablation_report.json b/polyguard-rl/docs/results/active_model/grpo_ablation_report.json similarity index 100% rename from docs/results/active_model/grpo_ablation_report.json rename to polyguard-rl/docs/results/active_model/grpo_ablation_report.json diff --git a/docs/results/active_model/grpo_training_cycle/grpo_trl_run.json b/polyguard-rl/docs/results/active_model/grpo_training_cycle/grpo_trl_run.json similarity index 100% rename from docs/results/active_model/grpo_training_cycle/grpo_trl_run.json rename to polyguard-rl/docs/results/active_model/grpo_training_cycle/grpo_trl_run.json diff --git a/docs/results/active_model/grpo_training_cycle/hf_training_status.json b/polyguard-rl/docs/results/active_model/grpo_training_cycle/hf_training_status.json similarity index 100% rename from docs/results/active_model/grpo_training_cycle/hf_training_status.json rename to polyguard-rl/docs/results/active_model/grpo_training_cycle/hf_training_status.json diff --git a/docs/results/active_model/grpo_trl_run.json b/polyguard-rl/docs/results/active_model/grpo_trl_run.json similarity index 100% rename from docs/results/active_model/grpo_trl_run.json rename to polyguard-rl/docs/results/active_model/grpo_trl_run.json diff --git a/docs/results/active_model/grpo_trl_run_auto.json b/polyguard-rl/docs/results/active_model/grpo_trl_run_auto.json similarity index 100% rename from docs/results/active_model/grpo_trl_run_auto.json rename to polyguard-rl/docs/results/active_model/grpo_trl_run_auto.json diff --git a/docs/results/active_model/grpo_trl_run_fallback_check.json b/polyguard-rl/docs/results/active_model/grpo_trl_run_fallback_check.json similarity index 100% rename from docs/results/active_model/grpo_trl_run_fallback_check.json rename to polyguard-rl/docs/results/active_model/grpo_trl_run_fallback_check.json diff --git a/docs/results/active_model/grpo_trl_run_smoke.json b/polyguard-rl/docs/results/active_model/grpo_trl_run_smoke.json similarity index 100% rename from docs/results/active_model/grpo_trl_run_smoke.json rename to polyguard-rl/docs/results/active_model/grpo_trl_run_smoke.json diff --git a/docs/results/active_model/grpo_trl_run_strict_check.json b/polyguard-rl/docs/results/active_model/grpo_trl_run_strict_check.json similarity index 100% rename from docs/results/active_model/grpo_trl_run_strict_check.json rename to polyguard-rl/docs/results/active_model/grpo_trl_run_strict_check.json diff --git a/docs/results/active_model/hf_sweep_summary.json b/polyguard-rl/docs/results/active_model/hf_sweep_summary.json similarity index 100% rename from docs/results/active_model/hf_sweep_summary.json rename to polyguard-rl/docs/results/active_model/hf_sweep_summary.json diff --git a/docs/results/active_model/hf_training_status.json b/polyguard-rl/docs/results/active_model/hf_training_status.json similarity index 100% rename from docs/results/active_model/hf_training_status.json rename to polyguard-rl/docs/results/active_model/hf_training_status.json diff --git a/docs/results/active_model/improvement_report.json b/polyguard-rl/docs/results/active_model/improvement_report.json similarity index 100% rename from docs/results/active_model/improvement_report.json rename to polyguard-rl/docs/results/active_model/improvement_report.json diff --git a/docs/results/active_model/improvement_report_benchmark.json b/polyguard-rl/docs/results/active_model/improvement_report_benchmark.json similarity index 100% rename from docs/results/active_model/improvement_report_benchmark.json rename to polyguard-rl/docs/results/active_model/improvement_report_benchmark.json diff --git a/docs/results/active_model/inference_benchmark.json b/polyguard-rl/docs/results/active_model/inference_benchmark.json similarity index 100% rename from docs/results/active_model/inference_benchmark.json rename to polyguard-rl/docs/results/active_model/inference_benchmark.json diff --git a/docs/results/active_model/planner_grpo.json b/polyguard-rl/docs/results/active_model/planner_grpo.json similarity index 100% rename from docs/results/active_model/planner_grpo.json rename to polyguard-rl/docs/results/active_model/planner_grpo.json diff --git a/docs/results/active_model/plot_index.json b/polyguard-rl/docs/results/active_model/plot_index.json similarity index 100% rename from docs/results/active_model/plot_index.json rename to polyguard-rl/docs/results/active_model/plot_index.json diff --git a/docs/results/active_model/postsave_inference.json b/polyguard-rl/docs/results/active_model/postsave_inference.json similarity index 100% rename from docs/results/active_model/postsave_inference.json rename to polyguard-rl/docs/results/active_model/postsave_inference.json diff --git a/docs/results/active_model/postsave_inference_smoke.json b/polyguard-rl/docs/results/active_model/postsave_inference_smoke.json similarity index 100% rename from docs/results/active_model/postsave_inference_smoke.json rename to polyguard-rl/docs/results/active_model/postsave_inference_smoke.json diff --git a/docs/results/active_model/risk_train.json b/polyguard-rl/docs/results/active_model/risk_train.json similarity index 100% rename from docs/results/active_model/risk_train.json rename to polyguard-rl/docs/results/active_model/risk_train.json diff --git a/docs/results/active_model/robustness.json b/polyguard-rl/docs/results/active_model/robustness.json similarity index 100% rename from docs/results/active_model/robustness.json rename to polyguard-rl/docs/results/active_model/robustness.json diff --git a/docs/results/active_model/sft_run.json b/polyguard-rl/docs/results/active_model/sft_run.json similarity index 100% rename from docs/results/active_model/sft_run.json rename to polyguard-rl/docs/results/active_model/sft_run.json diff --git a/docs/results/active_model/sft_trl_run.json b/polyguard-rl/docs/results/active_model/sft_trl_run.json similarity index 100% rename from docs/results/active_model/sft_trl_run.json rename to polyguard-rl/docs/results/active_model/sft_trl_run.json diff --git a/docs/results/active_model/supervisor_grpo.json b/polyguard-rl/docs/results/active_model/supervisor_grpo.json similarity index 100% rename from docs/results/active_model/supervisor_grpo.json rename to polyguard-rl/docs/results/active_model/supervisor_grpo.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/docs/results/active_model_manifest.json b/polyguard-rl/docs/results/active_model_manifest.json similarity index 100% rename from docs/results/active_model_manifest.json rename to polyguard-rl/docs/results/active_model_manifest.json diff --git a/polyguard-rl/docs/results/anti_cheat_failure_rates.png b/polyguard-rl/docs/results/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/docs/results/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/docs/results/anti_hacking_overfit_report.json b/polyguard-rl/docs/results/anti_hacking_overfit_report.json similarity index 100% rename from docs/results/anti_hacking_overfit_report.json rename to polyguard-rl/docs/results/anti_hacking_overfit_report.json diff --git a/polyguard-rl/docs/results/avg_process_fidelity.png b/polyguard-rl/docs/results/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/docs/results/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/docs/results/avg_reward.png b/polyguard-rl/docs/results/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/docs/results/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/docs/results/baselines.json b/polyguard-rl/docs/results/baselines.json similarity index 100% rename from docs/results/baselines.json rename to polyguard-rl/docs/results/baselines.json diff --git a/docs/results/benchmark_report.json b/polyguard-rl/docs/results/benchmark_report.json similarity index 100% rename from docs/results/benchmark_report.json rename to polyguard-rl/docs/results/benchmark_report.json diff --git a/docs/results/benchmark_report.txt b/polyguard-rl/docs/results/benchmark_report.txt similarity index 100% rename from docs/results/benchmark_report.txt rename to polyguard-rl/docs/results/benchmark_report.txt diff --git a/docs/results/dose_train.json b/polyguard-rl/docs/results/dose_train.json similarity index 100% rename from docs/results/dose_train.json rename to polyguard-rl/docs/results/dose_train.json diff --git a/docs/results/dosing_grpo.json b/polyguard-rl/docs/results/dosing_grpo.json similarity index 100% rename from docs/results/dosing_grpo.json rename to polyguard-rl/docs/results/dosing_grpo.json diff --git a/docs/results/final_submission_evidence/README.md b/polyguard-rl/docs/results/final_submission_evidence/README.md similarity index 100% rename from docs/results/final_submission_evidence/README.md rename to polyguard-rl/docs/results/final_submission_evidence/README.md diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/anti_cheat_failure_rates.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/avg_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_latency.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..90fc24b8e1c3f366404581bd4a7cd4995144948c --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932c29c95fbac79bfc3efb6031d8380f29f6651444e18e8603eb50be5b962a8c +size 54154 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/grpo_reward_curves.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..f3850bd6c22f350e534a1874e9e6d60f6f350450 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134ce16a328958cdddfb2d1273d9e8c27f72bf28a38ebfbcbd0ccdc540fe4d5 +size 18724 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/inference_latency_validity.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/inference_validity_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..f33803d8077f18c5635493a1baafca5ec7e48a1f --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58b43ac3029c3bd0ee87d5f70ebd8088fdcc04ffe870a04771cbd55a49b782 +size 51943 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/legality_rate.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_avg_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_exploit_detection.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae6a8fd169570681123487cdce84757f051bca9 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_exploit_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c66b9dcee62af7b6a1c435fa3c12167bb266bc1d1b239c2002d8243cbcd15 +size 30647 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_legality.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_stack_avg_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/primary_reward_channel_bars.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..86d4106ba43336486c140a3edd042516b04089e5 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f207bb316cc8d83f35d97de83001ae49cdded1260a4cfd457f734ca39e24b77f +size 71717 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..1c1c949c83aa2f174e141e739e92ca2f14b17bbd --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c731ca7dac821e93c9ff0a56ce40185f825bdc0e13d17985e8464c1f1e8667a9 +size 82066 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8102f2f9376d8292c356d27ca1c00beae2976442 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen-qwen2-5-3b-instruct_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29044889c9428544d50dfb52dd1893add297b2199e628b40563c856b7cd85d3 +size 67479 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_final_sft_train_loss.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..cc7255be66b67a137963413693944bf3ef06d15a --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_final_sft_train_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e5908ebdf95fcd5724efbce57abd175d85a06bab0d132503bed15d0f4d0b1b +size 37876 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_latency.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..eb8ac6a22484aeed8456588ac308ceab50ecfb1b --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8523c04ac3d23be97e68ce0eba546f5a8ff69bacd81e5c00c4550924c5e507d +size 34170 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..549027011f7d355f18b8f87cb285b9952df16405 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_postsave_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45694faea376c47326806e35bb73120f22ad9355a06c74738fdd1f2243d48107 +size 40824 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..de01785fc0b7fbac820e4f10f0e21d4d3a8e0cbc --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_remote_completed_stage_durations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb89e7e1f49e89953b076cf1b120fe2a4ba7291439708cffe4b54177d3b641d5 +size 44059 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..2542df7e152c469fe0399c1360f33edab685820b --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddbd70ab3f0cb766af70dedf84da136196c93dbeeb7b9830eac9aaa7a78e421 +size 35412 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_learning_rate.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d9036a9840836250368a7993ad019e1175fa484a --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13336a7ac896659c56b9a07cd272616a4ead67402fad5292dff8a9560a84c981 +size 71817 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..a6337bcdf68cf0b10fa8184d6352cb7feb01beaf --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f5e5d7288978d9d536785b6f366570a00e2cb2cd7ee074411a78d01977ee78 +size 84524 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_training_loss.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..208a4f1153376b021576659fdd9d4ec64f9488ea --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67da2872e4ac106622e9c46e1b09d6e69a41d65cca3ba38d34ca807c9b6b390a +size 80511 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..90d56668deedb84efd614c4cc101f989d75eba6c --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12c9af0eaaa56065254ad49fb4988b1a4375fc823889347c29647f2fed4caee +size 91484 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_learning_rate.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..3facdc5ffea0ef2c6c064075f5077d8b57960cef --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3699683c4759100be7d76530e9ad6fee136a2cc4aeb34972308788f87c2eb75e +size 70799 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..6701100aaa3fd4db073a801c41988f71d4cab5d9 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e64b64dca38f903fb4a47a4f0071335cb6abe1089f3e31a365774b0722077b +size 61234 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_training_loss.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_grpo_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_sft_loss.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..718bbaafed77be7c93097702d6668174747da2c5 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b0dd58526f8f70f77c6aef089d258ee3dbcddd159353b2dbe746cabece78b7 +size 46793 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_sft_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/reward_component_bars.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..6751ab23d8c9be3b40c5c7307fa73845cf997aff --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72d41541c4ecc021764f494fa3c62f1ce32dd292dd7a7f9cc64a1b17c0a9373 +size 18820 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_loss_curves.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..73657c679f411f4a68b974e34259790f151b7d29 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d69fe49976c0a685b85ca8e88ea8c501bd737d1578beab1a006e40f599bf1f +size 76643 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_validity_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb4b3e7407c7a93a5bdb911d32fffd40e10dc48 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2dfea5ab9ec5eec712516ea73935973c205a3bd99eb2e77516adaa349e798 +size 50375 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_vs_grpo_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/all/train_holdout_gap.png b/polyguard-rl/docs/results/final_submission_evidence/charts/all/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/all/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/docs/results/final_submission_evidence/charts/curated/README.md b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/README.md similarity index 100% rename from docs/results/final_submission_evidence/charts/curated/README.md rename to polyguard-rl/docs/results/final_submission_evidence/charts/curated/README.md diff --git a/docs/results/final_submission_evidence/charts/curated/chart_index.json b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/chart_index.json similarity index 100% rename from docs/results/final_submission_evidence/charts/curated/chart_index.json rename to polyguard-rl/docs/results/final_submission_evidence/charts/curated/chart_index.json diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/inference/inference_latency_validity.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/inference/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/inference/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/inference/inference_validity_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/inference/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..f33803d8077f18c5635493a1baafca5ec7e48a1f --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/inference/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58b43ac3029c3bd0ee87d5f70ebd8088fdcc04ffe870a04771cbd55a49b782 +size 51943 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_grpo_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_sft_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_loss_by_qwen_size.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_loss_by_qwen_size.png new file mode 100644 index 0000000000000000000000000000000000000000..63e393aa6897a863ba00cde9fe1fdd172649afda --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_loss_by_qwen_size.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef0473a5388f423aad428f8730b9e2ab1419a7783f3254b6197104cd86755b6 +size 39545 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_vs_grpo_reward_by_model.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_vs_grpo_reward_by_model.png new file mode 100644 index 0000000000000000000000000000000000000000..52856df1a884c36c21c15e06a183b8cefac53304 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/model_comparison/sft_vs_grpo_reward_by_model.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60510358fa7669b601e03a3c2718b9f770e97d48aab8b33475411cde2a907027 +size 41342 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_legality.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/policy_ablation/policy_ablation_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/reward_delta_by_seed.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/product_over_basic_llm/reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/anti_cheat_failure_rates.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/primary_reward_channel_bars.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/docs/results/final_submission_evidence/charts/curated/reward_and_safety/reward_component_bars.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/reward_component_bars.png similarity index 100% rename from docs/results/final_submission_evidence/charts/curated/reward_and_safety/reward_component_bars.png rename to polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/reward_component_bars.png diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/train_holdout_gap.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/reward_and_safety/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_kl_curve.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_kl_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..1dab0ea134ed5f0a9af4897fe2fcfb7423ecba0f --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_kl_curve.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af687f6953c97c38661a7609bcb8468ebc821711baf44586d91504ff88cc75b3 +size 62997 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_loss_curve.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_loss_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..d9b7daa0acd1027c915fdf0b99825bccb5d0411f --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_loss_curve.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc7280059099832e9c08e852926deabf0fce466ce3432572ef112ec56fa826d7 +size 56508 diff --git a/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_reward_curve.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_reward_curve.png similarity index 100% rename from docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_reward_curve.png rename to polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_grpo_reward_curve.png diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_sft_training_loss.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8102f2f9376d8292c356d27ca1c00beae2976442 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/qwen_3b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29044889c9428544d50dfb52dd1893add297b2199e628b40563c856b7cd85d3 +size 67479 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/sft_loss_curves_all_models.png b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/sft_loss_curves_all_models.png new file mode 100644 index 0000000000000000000000000000000000000000..73657c679f411f4a68b974e34259790f151b7d29 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/curated/training/sft_loss_curves_all_models.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d69fe49976c0a685b85ca8e88ea8c501bd737d1578beab1a006e40f599bf1f +size 76643 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/00_sft_vs_grpo_reward_by_model.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/00_sft_vs_grpo_reward_by_model.png new file mode 100644 index 0000000000000000000000000000000000000000..52856df1a884c36c21c15e06a183b8cefac53304 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/00_sft_vs_grpo_reward_by_model.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60510358fa7669b601e03a3c2718b9f770e97d48aab8b33475411cde2a907027 +size 41342 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/02_reward_delta_by_seed.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/02_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/02_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/03_policy_ablation_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/03_policy_ablation_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/03_policy_ablation_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/docs/results/final_submission_evidence/charts/frontpage/04_reward_components.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/04_reward_components.png similarity index 100% rename from docs/results/final_submission_evidence/charts/frontpage/04_reward_components.png rename to polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/04_reward_components.png diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/05_train_holdout_gap.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/05_train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/05_train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/06_inference_latency_validity.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/06_inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/06_inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/07_sft_vs_grpo_reward.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/07_sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/07_sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/08_sft_loss_by_model.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/08_sft_loss_by_model.png new file mode 100644 index 0000000000000000000000000000000000000000..63e393aa6897a863ba00cde9fe1fdd172649afda --- /dev/null +++ b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/08_sft_loss_by_model.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef0473a5388f423aad428f8730b9e2ab1419a7783f3254b6197104cd86755b6 +size 39545 diff --git a/docs/results/final_submission_evidence/charts/frontpage/09_qwen_3b_grpo_reward_curve.png b/polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/09_qwen_3b_grpo_reward_curve.png similarity index 100% rename from docs/results/final_submission_evidence/charts/frontpage/09_qwen_3b_grpo_reward_curve.png rename to polyguard-rl/docs/results/final_submission_evidence/charts/frontpage/09_qwen_3b_grpo_reward_curve.png diff --git a/docs/results/final_submission_evidence/charts/stale_superseded/README.md b/polyguard-rl/docs/results/final_submission_evidence/charts/stale_superseded/README.md similarity index 100% rename from docs/results/final_submission_evidence/charts/stale_superseded/README.md rename to polyguard-rl/docs/results/final_submission_evidence/charts/stale_superseded/README.md diff --git a/docs/results/final_submission_evidence/charts/stale_superseded/superseded_chart_index.json b/polyguard-rl/docs/results/final_submission_evidence/charts/stale_superseded/superseded_chart_index.json similarity index 100% rename from docs/results/final_submission_evidence/charts/stale_superseded/superseded_chart_index.json rename to polyguard-rl/docs/results/final_submission_evidence/charts/stale_superseded/superseded_chart_index.json diff --git a/docs/results/final_submission_evidence/manifest.json b/polyguard-rl/docs/results/final_submission_evidence/manifest.json similarity index 100% rename from docs/results/final_submission_evidence/manifest.json rename to polyguard-rl/docs/results/final_submission_evidence/manifest.json diff --git a/docs/results/final_submission_evidence/reports/action_traces.jsonl b/polyguard-rl/docs/results/final_submission_evidence/reports/action_traces.jsonl similarity index 100% rename from docs/results/final_submission_evidence/reports/action_traces.jsonl rename to polyguard-rl/docs/results/final_submission_evidence/reports/action_traces.jsonl diff --git a/docs/results/final_submission_evidence/reports/basic_llm_failure_cases.md b/polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_failure_cases.md similarity index 100% rename from docs/results/final_submission_evidence/reports/basic_llm_failure_cases.md rename to polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_failure_cases.md diff --git a/docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json b/polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json similarity index 100% rename from docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json rename to polyguard-rl/docs/results/final_submission_evidence/reports/basic_llm_vs_polyguard_report.json diff --git a/docs/results/final_submission_evidence/reports/grpo_ablation_report.json b/polyguard-rl/docs/results/final_submission_evidence/reports/grpo_ablation_report.json similarity index 100% rename from docs/results/final_submission_evidence/reports/grpo_ablation_report.json rename to polyguard-rl/docs/results/final_submission_evidence/reports/grpo_ablation_report.json diff --git a/docs/results/final_submission_evidence/reports/grpo_trl_run.json b/polyguard-rl/docs/results/final_submission_evidence/reports/grpo_trl_run.json similarity index 100% rename from docs/results/final_submission_evidence/reports/grpo_trl_run.json rename to polyguard-rl/docs/results/final_submission_evidence/reports/grpo_trl_run.json diff --git a/docs/results/final_submission_evidence/reports/policy_ablation_report.json b/polyguard-rl/docs/results/final_submission_evidence/reports/policy_ablation_report.json similarity index 100% rename from docs/results/final_submission_evidence/reports/policy_ablation_report.json rename to polyguard-rl/docs/results/final_submission_evidence/reports/policy_ablation_report.json diff --git a/docs/results/final_submission_evidence/reports/postsave_inference_grpo.json b/polyguard-rl/docs/results/final_submission_evidence/reports/postsave_inference_grpo.json similarity index 100% rename from docs/results/final_submission_evidence/reports/postsave_inference_grpo.json rename to polyguard-rl/docs/results/final_submission_evidence/reports/postsave_inference_grpo.json diff --git a/docs/results/final_submission_evidence/reports/submission_summary.json b/polyguard-rl/docs/results/final_submission_evidence/reports/submission_summary.json similarity index 100% rename from docs/results/final_submission_evidence/reports/submission_summary.json rename to polyguard-rl/docs/results/final_submission_evidence/reports/submission_summary.json diff --git a/docs/results/frontier_ready.json b/polyguard-rl/docs/results/frontier_ready.json similarity index 100% rename from docs/results/frontier_ready.json rename to polyguard-rl/docs/results/frontier_ready.json diff --git a/docs/results/graph_train.json b/polyguard-rl/docs/results/graph_train.json similarity index 100% rename from docs/results/graph_train.json rename to polyguard-rl/docs/results/graph_train.json diff --git a/docs/results/grpo_ablation_report.json b/polyguard-rl/docs/results/grpo_ablation_report.json similarity index 100% rename from docs/results/grpo_ablation_report.json rename to polyguard-rl/docs/results/grpo_ablation_report.json diff --git a/polyguard-rl/docs/results/grpo_reward_curves.png b/polyguard-rl/docs/results/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..f3850bd6c22f350e534a1874e9e6d60f6f350450 --- /dev/null +++ b/polyguard-rl/docs/results/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134ce16a328958cdddfb2d1273d9e8c27f72bf28a38ebfbcbd0ccdc540fe4d5 +size 18724 diff --git a/polyguard-rl/docs/results/grpo_training_cycle/avg_process_fidelity.png b/polyguard-rl/docs/results/grpo_training_cycle/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/docs/results/grpo_training_cycle/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/docs/results/grpo_training_cycle/avg_reward.png b/polyguard-rl/docs/results/grpo_training_cycle/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/docs/results/grpo_training_cycle/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/docs/results/grpo_training_cycle/grpo_trl_run.json b/polyguard-rl/docs/results/grpo_training_cycle/grpo_trl_run.json similarity index 100% rename from docs/results/grpo_training_cycle/grpo_trl_run.json rename to polyguard-rl/docs/results/grpo_training_cycle/grpo_trl_run.json diff --git a/docs/results/grpo_training_cycle/hf_training_status.json b/polyguard-rl/docs/results/grpo_training_cycle/hf_training_status.json similarity index 100% rename from docs/results/grpo_training_cycle/hf_training_status.json rename to polyguard-rl/docs/results/grpo_training_cycle/hf_training_status.json diff --git a/polyguard-rl/docs/results/grpo_training_cycle/legality_rate.png b/polyguard-rl/docs/results/grpo_training_cycle/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/docs/results/grpo_training_cycle/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/docs/results/grpo_training_cycle/policy_stack_avg_reward.png b/polyguard-rl/docs/results/grpo_training_cycle/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/docs/results/grpo_training_cycle/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/docs/results/grpo_training_cycle/success_rate.png b/polyguard-rl/docs/results/grpo_training_cycle/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/docs/results/grpo_training_cycle/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/docs/results/grpo_trl_run.json b/polyguard-rl/docs/results/grpo_trl_run.json similarity index 100% rename from docs/results/grpo_trl_run.json rename to polyguard-rl/docs/results/grpo_trl_run.json diff --git a/docs/results/grpo_trl_run_auto.json b/polyguard-rl/docs/results/grpo_trl_run_auto.json similarity index 100% rename from docs/results/grpo_trl_run_auto.json rename to polyguard-rl/docs/results/grpo_trl_run_auto.json diff --git a/docs/results/grpo_trl_run_fallback_check.json b/polyguard-rl/docs/results/grpo_trl_run_fallback_check.json similarity index 100% rename from docs/results/grpo_trl_run_fallback_check.json rename to polyguard-rl/docs/results/grpo_trl_run_fallback_check.json diff --git a/docs/results/grpo_trl_run_smoke.json b/polyguard-rl/docs/results/grpo_trl_run_smoke.json similarity index 100% rename from docs/results/grpo_trl_run_smoke.json rename to polyguard-rl/docs/results/grpo_trl_run_smoke.json diff --git a/docs/results/grpo_trl_run_strict_check.json b/polyguard-rl/docs/results/grpo_trl_run_strict_check.json similarity index 100% rename from docs/results/grpo_trl_run_strict_check.json rename to polyguard-rl/docs/results/grpo_trl_run_strict_check.json diff --git a/docs/results/hf_space_verification.json b/polyguard-rl/docs/results/hf_space_verification.json similarity index 100% rename from docs/results/hf_space_verification.json rename to polyguard-rl/docs/results/hf_space_verification.json diff --git a/docs/results/hf_sweep_summary.json b/polyguard-rl/docs/results/hf_sweep_summary.json similarity index 100% rename from docs/results/hf_sweep_summary.json rename to polyguard-rl/docs/results/hf_sweep_summary.json diff --git a/docs/results/hf_training_status.json b/polyguard-rl/docs/results/hf_training_status.json similarity index 100% rename from docs/results/hf_training_status.json rename to polyguard-rl/docs/results/hf_training_status.json diff --git a/docs/results/improvement_report.json b/polyguard-rl/docs/results/improvement_report.json similarity index 100% rename from docs/results/improvement_report.json rename to polyguard-rl/docs/results/improvement_report.json diff --git a/docs/results/improvement_report_benchmark.json b/polyguard-rl/docs/results/improvement_report_benchmark.json similarity index 100% rename from docs/results/improvement_report_benchmark.json rename to polyguard-rl/docs/results/improvement_report_benchmark.json diff --git a/docs/results/inference_benchmark.json b/polyguard-rl/docs/results/inference_benchmark.json similarity index 100% rename from docs/results/inference_benchmark.json rename to polyguard-rl/docs/results/inference_benchmark.json diff --git a/polyguard-rl/docs/results/inference_latency_validity.png b/polyguard-rl/docs/results/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/inference_validity_reward.png b/polyguard-rl/docs/results/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..f33803d8077f18c5635493a1baafca5ec7e48a1f --- /dev/null +++ b/polyguard-rl/docs/results/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58b43ac3029c3bd0ee87d5f70ebd8088fdcc04ffe870a04771cbd55a49b782 +size 51943 diff --git a/polyguard-rl/docs/results/legality_rate.png b/polyguard-rl/docs/results/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/docs/results/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/chart_index.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/chart_index.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/chart_index.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/chart_index.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/evidence_matrix.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/evidence_matrix.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/evidence_matrix.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/evidence_matrix.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/manifest.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/manifest.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/manifest.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/manifest.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/model_improvement_report.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/model_improvement_report.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/model_improvement_report.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/model_improvement_report.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/policy_ablation_report.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/policy_ablation_report.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/policy_ablation_report.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/policy_ablation_report.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/remote_stage_records.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/remote_stage_records.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/remote_stage_records.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/remote_stage_records.json diff --git a/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/submission_summary.json b/polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/submission_summary.json similarity index 100% rename from docs/results/model_improvement_evidence/qwen_0_5b_1_5b/submission_summary.json rename to polyguard-rl/docs/results/model_improvement_evidence/qwen_0_5b_1_5b/submission_summary.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/README.md b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/README.md similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/README.md rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/README.md diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/chart_index.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/chart_index.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/chart_index.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/chart_index.json diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/grpo_training/grpo_reward_curves.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/grpo_training/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..f3850bd6c22f350e534a1874e9e6d60f6f350450 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/grpo_training/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134ce16a328958cdddfb2d1273d9e8c27f72bf28a38ebfbcbd0ccdc540fe4d5 +size 18724 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/inference/inference_latency_validity.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/inference/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/inference/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_grpo_reward.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_loss.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..718bbaafed77be7c93097702d6668174747da2c5 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b0dd58526f8f70f77c6aef089d258ee3dbcddd159353b2dbe746cabece78b7 +size 46793 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_reward.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/model_comparison/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/overfit_checks/train_holdout_gap.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/overfit_checks/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/overfit_checks/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_avg_reward.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_legality.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_stack_avg_reward.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/policy_ablation/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/product_over_basic_llm/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/primary_reward_channel_bars.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/reward_component_bars.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/reward_component_bars.png similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/reward_component_bars.png rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/reward_function/reward_component_bars.png diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/safeguards/anti_cheat_failure_rates.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/safeguards/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/safeguards/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/sft_vs_grpo/sft_vs_grpo_reward.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/sft_vs_grpo/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/sft_vs_grpo/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_accuracy/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_accuracy/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..54dbe92d3a51ba9cc8883e782d1a5654cb9bd21e --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_accuracy/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827b7584252d1e1fec71a719c107680ff762e5a5fdbe4eed6538a0794f8b5cf2 +size 72990 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_sft_training_loss.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..4373c02c5fee262fbb3ceab3d953f0be01b83691 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c01ac6fd578a4c19dbe5116ae2be54bb527a787947d79e29222896323203ba +size 62864 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_1_5b_sft_training_loss.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_loss/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_runtime/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_runtime/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..fc3a14cfc74ed4f75cc8a7fa336235d3207e55b2 --- /dev/null +++ b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/charts/training_runtime/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f792ccfb0694c6df64a0f01950b09a85258c25237c97c2e6e3d124253c42a9c +size 30813 diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/manifest.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/manifest.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/manifest.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/manifest.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/baseline_vs_trained_cases.md b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/baseline_vs_trained_cases.md similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/baseline_vs_trained_cases.md rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/baseline_vs_trained_cases.md diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/evidence_matrix.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/evidence_matrix.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/evidence_matrix.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/evidence_matrix.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/manifest.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/manifest.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/manifest.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/manifest.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/model_improvement_report.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/model_improvement_report.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/model_improvement_report.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/model_improvement_report.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/submission_summary.json b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/submission_summary.json similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/submission_summary.json rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/reports/submission_summary.json diff --git a/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl b/polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl similarity index 100% rename from docs/results/model_improvement_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl rename to polyguard-rl/docs/results/model_improvement_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl diff --git a/docs/results/planner_grpo.json b/polyguard-rl/docs/results/planner_grpo.json similarity index 100% rename from docs/results/planner_grpo.json rename to polyguard-rl/docs/results/planner_grpo.json diff --git a/docs/results/plot_index.json b/polyguard-rl/docs/results/plot_index.json similarity index 100% rename from docs/results/plot_index.json rename to polyguard-rl/docs/results/plot_index.json diff --git a/polyguard-rl/docs/results/policy_stack_avg_reward.png b/polyguard-rl/docs/results/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/docs/results/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/docs/results/postsave_inference.json b/polyguard-rl/docs/results/postsave_inference.json similarity index 100% rename from docs/results/postsave_inference.json rename to polyguard-rl/docs/results/postsave_inference.json diff --git a/docs/results/postsave_inference_smoke.json b/polyguard-rl/docs/results/postsave_inference_smoke.json similarity index 100% rename from docs/results/postsave_inference_smoke.json rename to polyguard-rl/docs/results/postsave_inference_smoke.json diff --git a/docs/results/qwen_completed_runs/README.md b/polyguard-rl/docs/results/qwen_completed_runs/README.md similarity index 100% rename from docs/results/qwen_completed_runs/README.md rename to polyguard-rl/docs/results/qwen_completed_runs/README.md diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..be7e308acf74757bb7bb36bdd1dba04d57a70ea1 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cedb9e4d520d67f67969b2e7124883889950888e180858c2c84157554cc89a +size 58853 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae6a8fd169570681123487cdce84757f051bca9 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c66b9dcee62af7b6a1c435fa3c12167bb266bc1d1b239c2002d8243cbcd15 +size 30647 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_legality.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..ab2682c3e2cbf2a3c4338f6c02e0b42768a9b583 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38ac3a639dd23250d7b11412643f43971c9ca60842b569b79a79d70aceeac71 +size 35813 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..b719773814c253c870ac8cba1d7aef285d0eb862 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b670cfff8fbce3f23f0f67df84d75370745cb51298ae0a8da3458ba55bb8986 +size 30067 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..86822aa56253de8959c09699e31b29405dbdb5d1 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4027700596bc53c29e6c8fe5058dbc84f2452c6e4299869ba3a41614a56b974f +size 37401 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..af5b44b88c7ecc050085c11062326ffe6c28f869 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4944f9b34e912cb5d7be323452b661c761b2e431c01e59cb23f858e2187f2df9 +size 20369 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..fc3a14cfc74ed4f75cc8a7fa336235d3207e55b2 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f792ccfb0694c6df64a0f01950b09a85258c25237c97c2e6e3d124253c42a9c +size 30813 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d9036a9840836250368a7993ad019e1175fa484a --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13336a7ac896659c56b9a07cd272616a4ead67402fad5292dff8a9560a84c981 +size 71817 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..a6337bcdf68cf0b10fa8184d6352cb7feb01beaf --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f5e5d7288978d9d536785b6f366570a00e2cb2cd7ee074411a78d01977ee78 +size 84524 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..4373c02c5fee262fbb3ceab3d953f0be01b83691 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c01ac6fd578a4c19dbe5116ae2be54bb527a787947d79e29222896323203ba +size 62864 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..54dbe92d3a51ba9cc8883e782d1a5654cb9bd21e --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827b7584252d1e1fec71a719c107680ff762e5a5fdbe4eed6538a0794f8b5cf2 +size 72990 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..3facdc5ffea0ef2c6c064075f5077d8b57960cef --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3699683c4759100be7d76530e9ad6fee136a2cc4aeb34972308788f87c2eb75e +size 70799 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..6701100aaa3fd4db073a801c41988f71d4cab5d9 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e64b64dca38f903fb4a47a4f0071335cb6abe1089f3e31a365774b0722077b +size 61234 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/docs/results/qwen_completed_runs/charts/generated/reward_component_bars.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/generated/reward_component_bars.png similarity index 100% rename from docs/results/qwen_completed_runs/charts/generated/reward_component_bars.png rename to polyguard-rl/docs/results/qwen_completed_runs/charts/generated/reward_component_bars.png diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..f3850bd6c22f350e534a1874e9e6d60f6f350450 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134ce16a328958cdddfb2d1273d9e8c27f72bf28a38ebfbcbd0ccdc540fe4d5 +size 18724 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..f33803d8077f18c5635493a1baafca5ec7e48a1f --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58b43ac3029c3bd0ee87d5f70ebd8088fdcc04ffe870a04771cbd55a49b782 +size 51943 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/legality_rate.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..718bbaafed77be7c93097702d6668174747da2c5 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b0dd58526f8f70f77c6aef089d258ee3dbcddd159353b2dbe746cabece78b7 +size 46793 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..6751ab23d8c9be3b40c5c7307fa73845cf997aff --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72d41541c4ecc021764f494fa3c62f1ce32dd292dd7a7f9cc64a1b17c0a9373 +size 18820 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..73657c679f411f4a68b974e34259790f151b7d29 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d69fe49976c0a685b85ca8e88ea8c501bd737d1578beab1a006e40f599bf1f +size 76643 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb4b3e7407c7a93a5bdb911d32fffd40e10dc48 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2dfea5ab9ec5eec712516ea73935973c205a3bd99eb2e77516adaa349e798 +size 50375 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/docs/results/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json b/polyguard-rl/docs/results/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json similarity index 100% rename from docs/results/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json rename to polyguard-rl/docs/results/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json diff --git a/docs/results/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json diff --git a/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json diff --git a/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json diff --git a/docs/results/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json b/polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json similarity index 100% rename from docs/results/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json rename to polyguard-rl/docs/results/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json diff --git a/polyguard-rl/docs/results/qwen_model_grpo_reward.png b/polyguard-rl/docs/results/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/docs/results/qwen_model_sft_loss.png b/polyguard-rl/docs/results/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..718bbaafed77be7c93097702d6668174747da2c5 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b0dd58526f8f70f77c6aef089d258ee3dbcddd159353b2dbe746cabece78b7 +size 46793 diff --git a/polyguard-rl/docs/results/qwen_model_sft_reward.png b/polyguard-rl/docs/results/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/docs/results/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/docs/results/reward_component_bars.png b/polyguard-rl/docs/results/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..6751ab23d8c9be3b40c5c7307fa73845cf997aff --- /dev/null +++ b/polyguard-rl/docs/results/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72d41541c4ecc021764f494fa3c62f1ce32dd292dd7a7f9cc64a1b17c0a9373 +size 18820 diff --git a/docs/results/risk_train.json b/polyguard-rl/docs/results/risk_train.json similarity index 100% rename from docs/results/risk_train.json rename to polyguard-rl/docs/results/risk_train.json diff --git a/docs/results/robustness.json b/polyguard-rl/docs/results/robustness.json similarity index 100% rename from docs/results/robustness.json rename to polyguard-rl/docs/results/robustness.json diff --git a/polyguard-rl/docs/results/sft_loss_curves.png b/polyguard-rl/docs/results/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..73657c679f411f4a68b974e34259790f151b7d29 --- /dev/null +++ b/polyguard-rl/docs/results/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d69fe49976c0a685b85ca8e88ea8c501bd737d1578beab1a006e40f599bf1f +size 76643 diff --git a/docs/results/sft_run.json b/polyguard-rl/docs/results/sft_run.json similarity index 100% rename from docs/results/sft_run.json rename to polyguard-rl/docs/results/sft_run.json diff --git a/docs/results/sft_trl_run.json b/polyguard-rl/docs/results/sft_trl_run.json similarity index 100% rename from docs/results/sft_trl_run.json rename to polyguard-rl/docs/results/sft_trl_run.json diff --git a/polyguard-rl/docs/results/sft_validity_reward.png b/polyguard-rl/docs/results/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb4b3e7407c7a93a5bdb911d32fffd40e10dc48 --- /dev/null +++ b/polyguard-rl/docs/results/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2dfea5ab9ec5eec712516ea73935973c205a3bd99eb2e77516adaa349e798 +size 50375 diff --git a/polyguard-rl/docs/results/sft_vs_grpo_reward.png b/polyguard-rl/docs/results/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/docs/results/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..be7e308acf74757bb7bb36bdd1dba04d57a70ea1 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cedb9e4d520d67f67969b2e7124883889950888e180858c2c84157554cc89a +size 58853 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/manifest.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/manifest.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/manifest.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/manifest.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/mirrored_files.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/mirrored_files.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/mirrored_files.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/mirrored_files.json diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae6a8fd169570681123487cdce84757f051bca9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c66b9dcee62af7b6a1c435fa3c12167bb266bc1d1b239c2002d8243cbcd15 +size 30647 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_report.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_report.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_report.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_report.json diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..ab2682c3e2cbf2a3c4338f6c02e0b42768a9b583 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38ac3a639dd23250d7b11412643f43971c9ca60842b569b79a79d70aceeac71 +size 35813 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..b719773814c253c870ac8cba1d7aef285d0eb862 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b670cfff8fbce3f23f0f67df84d75370745cb51298ae0a8da3458ba55bb8986 +size 30067 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..86822aa56253de8959c09699e31b29405dbdb5d1 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4027700596bc53c29e6c8fe5058dbc84f2452c6e4299869ba3a41614a56b974f +size 37401 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..af5b44b88c7ecc050085c11062326ffe6c28f869 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4944f9b34e912cb5d7be323452b661c761b2e431c01e59cb23f858e2187f2df9 +size 20369 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..fc3a14cfc74ed4f75cc8a7fa336235d3207e55b2 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f792ccfb0694c6df64a0f01950b09a85258c25237c97c2e6e3d124253c42a9c +size 30813 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d9036a9840836250368a7993ad019e1175fa484a --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13336a7ac896659c56b9a07cd272616a4ead67402fad5292dff8a9560a84c981 +size 71817 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..a6337bcdf68cf0b10fa8184d6352cb7feb01beaf --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f5e5d7288978d9d536785b6f366570a00e2cb2cd7ee074411a78d01977ee78 +size 84524 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..4373c02c5fee262fbb3ceab3d953f0be01b83691 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c01ac6fd578a4c19dbe5116ae2be54bb527a787947d79e29222896323203ba +size 62864 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..54dbe92d3a51ba9cc8883e782d1a5654cb9bd21e --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827b7584252d1e1fec71a719c107680ff762e5a5fdbe4eed6538a0794f8b5cf2 +size 72990 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..3facdc5ffea0ef2c6c064075f5077d8b57960cef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3699683c4759100be7d76530e9ad6fee136a2cc4aeb34972308788f87c2eb75e +size 70799 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..6701100aaa3fd4db073a801c41988f71d4cab5d9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e64b64dca38f903fb4a47a4f0071335cb6abe1089f3e31a365774b0722077b +size 61234 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/remote_stage_records.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/remote_stage_records.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/remote_stage_records.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/remote_stage_records.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/availability.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/availability.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/submission_summary.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/submission_summary.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b/submission_summary.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b/submission_summary.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/artifact_repo_listing.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/artifact_repo_listing.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/artifact_repo_listing.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/artifact_repo_listing.json diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..9df59e960afd8c26012728a235c82a462f350898 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb038b95e95a64fa617aef1ed28a0a6bf9e63f24f3201a87f4498d1daa820260 +size 60005 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_polyguard_report.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_polyguard_report.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_polyguard_report.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_polyguard_report.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/hf_status_snapshot.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/hf_status_snapshot.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/hf_status_snapshot.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/hf_status_snapshot.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/manifest.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/manifest.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/manifest.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/manifest.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/mirrored_files.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/mirrored_files.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/mirrored_files.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/mirrored_files.json diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae6a8fd169570681123487cdce84757f051bca9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c66b9dcee62af7b6a1c435fa3c12167bb266bc1d1b239c2002d8243cbcd15 +size 30647 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_report.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_report.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_report.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_report.json diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..86d4106ba43336486c140a3edd042516b04089e5 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f207bb316cc8d83f35d97de83001ae49cdded1260a4cfd457f734ca39e24b77f +size 71717 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..1c1c949c83aa2f174e141e739e92ca2f14b17bbd --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c731ca7dac821e93c9ff0a56ce40185f825bdc0e13d17985e8464c1f1e8667a9 +size 82066 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8102f2f9376d8292c356d27ca1c00beae2976442 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29044889c9428544d50dfb52dd1893add297b2199e628b40563c856b7cd85d3 +size 67479 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..cc7255be66b67a137963413693944bf3ef06d15a --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e5908ebdf95fcd5724efbce57abd175d85a06bab0d132503bed15d0f4d0b1b +size 37876 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..38fad7e8d4fb833dbaf2630eada1e3562c7393a6 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef8ab98e3c6c502932b249333681e7d561cbd6a7a2126f3ef6b4ff22cf536468 +size 33096 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..768c4232fb3881416f28bbc55128e23531aee652 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21270e57aed31ad8f986908bbef375463644cfec49bd622cb68bd5fa20ae5184 +size 40711 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..de01785fc0b7fbac820e4f10f0e21d4d3a8e0cbc --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb89e7e1f49e89953b076cf1b120fe2a4ba7291439708cffe4b54177d3b641d5 +size 44059 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..2542df7e152c469fe0399c1360f33edab685820b --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddbd70ab3f0cb766af70dedf84da136196c93dbeeb7b9830eac9aaa7a78e421 +size 35412 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d9036a9840836250368a7993ad019e1175fa484a --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13336a7ac896659c56b9a07cd272616a4ead67402fad5292dff8a9560a84c981 +size 71817 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..a6337bcdf68cf0b10fa8184d6352cb7feb01beaf --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f5e5d7288978d9d536785b6f366570a00e2cb2cd7ee074411a78d01977ee78 +size 84524 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..208a4f1153376b021576659fdd9d4ec64f9488ea --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67da2872e4ac106622e9c46e1b09d6e69a41d65cca3ba38d34ca807c9b6b390a +size 80511 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..90d56668deedb84efd614c4cc101f989d75eba6c --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12c9af0eaaa56065254ad49fb4988b1a4375fc823889347c29647f2fed4caee +size 91484 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..3facdc5ffea0ef2c6c064075f5077d8b57960cef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3699683c4759100be7d76530e9ad6fee136a2cc4aeb34972308788f87c2eb75e +size 70799 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..6701100aaa3fd4db073a801c41988f71d4cab5d9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e64b64dca38f903fb4a47a4f0071335cb6abe1089f3e31a365774b0722077b +size 61234 diff --git a/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/remote_stage_records.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/remote_stage_records.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/remote_stage_records.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/remote_stage_records.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/availability.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/availability.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/availability.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/submission_summary.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/submission_summary.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/submission_summary.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/submission_summary.json diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/training_space_runtime_status.json b/polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/training_space_runtime_status.json similarity index 100% rename from docs/results/submission_evidence/qwen_0_5b_1_5b_3b/training_space_runtime_status.json rename to polyguard-rl/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/training_space_runtime_status.json diff --git a/docs/results/submission_evidence/qwen_3b_continuation/training_space_runtime_status.json b/polyguard-rl/docs/results/submission_evidence/qwen_3b_continuation/training_space_runtime_status.json similarity index 100% rename from docs/results/submission_evidence/qwen_3b_continuation/training_space_runtime_status.json rename to polyguard-rl/docs/results/submission_evidence/qwen_3b_continuation/training_space_runtime_status.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/README.md b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/README.md similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/README.md rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/README.md diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..be7e308acf74757bb7bb36bdd1dba04d57a70ea1 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cedb9e4d520d67f67969b2e7124883889950888e180858c2c84157554cc89a +size 58853 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae6a8fd169570681123487cdce84757f051bca9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c66b9dcee62af7b6a1c435fa3c12167bb266bc1d1b239c2002d8243cbcd15 +size 30647 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..ab2682c3e2cbf2a3c4338f6c02e0b42768a9b583 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38ac3a639dd23250d7b11412643f43971c9ca60842b569b79a79d70aceeac71 +size 35813 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..b719773814c253c870ac8cba1d7aef285d0eb862 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b670cfff8fbce3f23f0f67df84d75370745cb51298ae0a8da3458ba55bb8986 +size 30067 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..86822aa56253de8959c09699e31b29405dbdb5d1 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4027700596bc53c29e6c8fe5058dbc84f2452c6e4299869ba3a41614a56b974f +size 37401 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..af5b44b88c7ecc050085c11062326ffe6c28f869 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4944f9b34e912cb5d7be323452b661c761b2e431c01e59cb23f858e2187f2df9 +size 20369 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..fc3a14cfc74ed4f75cc8a7fa336235d3207e55b2 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f792ccfb0694c6df64a0f01950b09a85258c25237c97c2e6e3d124253c42a9c +size 30813 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d9036a9840836250368a7993ad019e1175fa484a --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13336a7ac896659c56b9a07cd272616a4ead67402fad5292dff8a9560a84c981 +size 71817 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..a6337bcdf68cf0b10fa8184d6352cb7feb01beaf --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f5e5d7288978d9d536785b6f366570a00e2cb2cd7ee074411a78d01977ee78 +size 84524 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..4373c02c5fee262fbb3ceab3d953f0be01b83691 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c01ac6fd578a4c19dbe5116ae2be54bb527a787947d79e29222896323203ba +size 62864 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..54dbe92d3a51ba9cc8883e782d1a5654cb9bd21e --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827b7584252d1e1fec71a719c107680ff762e5a5fdbe4eed6538a0794f8b5cf2 +size 72990 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..3facdc5ffea0ef2c6c064075f5077d8b57960cef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3699683c4759100be7d76530e9ad6fee136a2cc4aeb34972308788f87c2eb75e +size 70799 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..6701100aaa3fd4db073a801c41988f71d4cab5d9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e64b64dca38f903fb4a47a4f0071335cb6abe1089f3e31a365774b0722077b +size 61234 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..f3850bd6c22f350e534a1874e9e6d60f6f350450 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134ce16a328958cdddfb2d1273d9e8c27f72bf28a38ebfbcbd0ccdc540fe4d5 +size 18724 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_validity_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..f33803d8077f18c5635493a1baafca5ec7e48a1f --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58b43ac3029c3bd0ee87d5f70ebd8088fdcc04ffe870a04771cbd55a49b782 +size 51943 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..718bbaafed77be7c93097702d6668174747da2c5 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b0dd58526f8f70f77c6aef089d258ee3dbcddd159353b2dbe746cabece78b7 +size 46793 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..6751ab23d8c9be3b40c5c7307fa73845cf997aff --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72d41541c4ecc021764f494fa3c62f1ce32dd292dd7a7f9cc64a1b17c0a9373 +size 18820 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..73657c679f411f4a68b974e34259790f151b7d29 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d69fe49976c0a685b85ca8e88ea8c501bd737d1578beab1a006e40f599bf1f +size 76643 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_validity_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb4b3e7407c7a93a5bdb911d32fffd40e10dc48 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2dfea5ab9ec5eec712516ea73935973c205a3bd99eb2e77516adaa349e798 +size 50375 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/manifest.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/manifest.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/manifest.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/manifest.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/submission_summary.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/submission_summary.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/submission_summary.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/submission_summary.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/README.md b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/README.md similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/README.md rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/README.md diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..90fc24b8e1c3f366404581bd4a7cd4995144948c --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932c29c95fbac79bfc3efb6031d8380f29f6651444e18e8603eb50be5b962a8c +size 54154 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae6a8fd169570681123487cdce84757f051bca9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c66b9dcee62af7b6a1c435fa3c12167bb266bc1d1b239c2002d8243cbcd15 +size 30647 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..86d4106ba43336486c140a3edd042516b04089e5 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f207bb316cc8d83f35d97de83001ae49cdded1260a4cfd457f734ca39e24b77f +size 71717 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..1c1c949c83aa2f174e141e739e92ca2f14b17bbd --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c731ca7dac821e93c9ff0a56ce40185f825bdc0e13d17985e8464c1f1e8667a9 +size 82066 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8102f2f9376d8292c356d27ca1c00beae2976442 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29044889c9428544d50dfb52dd1893add297b2199e628b40563c856b7cd85d3 +size 67479 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..cc7255be66b67a137963413693944bf3ef06d15a --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e5908ebdf95fcd5724efbce57abd175d85a06bab0d132503bed15d0f4d0b1b +size 37876 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..eb8ac6a22484aeed8456588ac308ceab50ecfb1b --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8523c04ac3d23be97e68ce0eba546f5a8ff69bacd81e5c00c4550924c5e507d +size 34170 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..549027011f7d355f18b8f87cb285b9952df16405 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45694faea376c47326806e35bb73120f22ad9355a06c74738fdd1f2243d48107 +size 40824 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..de01785fc0b7fbac820e4f10f0e21d4d3a8e0cbc --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb89e7e1f49e89953b076cf1b120fe2a4ba7291439708cffe4b54177d3b641d5 +size 44059 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..2542df7e152c469fe0399c1360f33edab685820b --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddbd70ab3f0cb766af70dedf84da136196c93dbeeb7b9830eac9aaa7a78e421 +size 35412 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d9036a9840836250368a7993ad019e1175fa484a --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13336a7ac896659c56b9a07cd272616a4ead67402fad5292dff8a9560a84c981 +size 71817 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..a6337bcdf68cf0b10fa8184d6352cb7feb01beaf --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f5e5d7288978d9d536785b6f366570a00e2cb2cd7ee074411a78d01977ee78 +size 84524 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..208a4f1153376b021576659fdd9d4ec64f9488ea --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67da2872e4ac106622e9c46e1b09d6e69a41d65cca3ba38d34ca807c9b6b390a +size 80511 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..90d56668deedb84efd614c4cc101f989d75eba6c --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12c9af0eaaa56065254ad49fb4988b1a4375fc823889347c29647f2fed4caee +size 91484 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..3facdc5ffea0ef2c6c064075f5077d8b57960cef --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3699683c4759100be7d76530e9ad6fee136a2cc4aeb34972308788f87c2eb75e +size 70799 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..6701100aaa3fd4db073a801c41988f71d4cab5d9 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e64b64dca38f903fb4a47a4f0071335cb6abe1089f3e31a365774b0722077b +size 61234 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..f3850bd6c22f350e534a1874e9e6d60f6f350450 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134ce16a328958cdddfb2d1273d9e8c27f72bf28a38ebfbcbd0ccdc540fe4d5 +size 18724 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_validity_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..f33803d8077f18c5635493a1baafca5ec7e48a1f --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58b43ac3029c3bd0ee87d5f70ebd8088fdcc04ffe870a04771cbd55a49b782 +size 51943 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..718bbaafed77be7c93097702d6668174747da2c5 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b0dd58526f8f70f77c6aef089d258ee3dbcddd159353b2dbe746cabece78b7 +size 46793 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..6751ab23d8c9be3b40c5c7307fa73845cf997aff --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72d41541c4ecc021764f494fa3c62f1ce32dd292dd7a7f9cc64a1b17c0a9373 +size 18820 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..73657c679f411f4a68b974e34259790f151b7d29 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d69fe49976c0a685b85ca8e88ea8c501bd737d1578beab1a006e40f599bf1f +size 76643 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_validity_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb4b3e7407c7a93a5bdb911d32fffd40e10dc48 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2dfea5ab9ec5eec712516ea73935973c205a3bd99eb2e77516adaa349e798 +size 50375 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/manifest.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/manifest.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/manifest.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/manifest.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_history.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_history.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_history.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_history.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_trl_run.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_trl_run.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_trl_run.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/grpo_trl_run.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/submission_summary.json b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/submission_summary.json similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/submission_summary.json rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/submission_summary.json diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl b/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl similarity index 100% rename from docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl rename to polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl diff --git a/polyguard-rl/docs/results/success_rate.png b/polyguard-rl/docs/results/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/docs/results/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/docs/results/supervisor_grpo.json b/polyguard-rl/docs/results/supervisor_grpo.json similarity index 100% rename from docs/results/supervisor_grpo.json rename to polyguard-rl/docs/results/supervisor_grpo.json diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/polyguard-rl/docs/results/train_holdout_gap.png b/polyguard-rl/docs/results/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/docs/results/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/docs/reward_design.md b/polyguard-rl/docs/reward_design.md similarity index 100% rename from docs/reward_design.md rename to polyguard-rl/docs/reward_design.md diff --git a/docs/roadmap.md b/polyguard-rl/docs/roadmap.md similarity index 100% rename from docs/roadmap.md rename to polyguard-rl/docs/roadmap.md diff --git a/docs/safety.md b/polyguard-rl/docs/safety.md similarity index 100% rename from docs/safety.md rename to polyguard-rl/docs/safety.md diff --git a/docs/submission_artifacts.md b/polyguard-rl/docs/submission_artifacts.md similarity index 100% rename from docs/submission_artifacts.md rename to polyguard-rl/docs/submission_artifacts.md diff --git a/docs/submission_checklist.md b/polyguard-rl/docs/submission_checklist.md similarity index 100% rename from docs/submission_checklist.md rename to polyguard-rl/docs/submission_checklist.md diff --git a/docs/training.md b/polyguard-rl/docs/training.md similarity index 100% rename from docs/training.md rename to polyguard-rl/docs/training.md diff --git a/docs/ui.md b/polyguard-rl/docs/ui.md similarity index 100% rename from docs/ui.md rename to polyguard-rl/docs/ui.md diff --git a/models.py b/polyguard-rl/models.py similarity index 100% rename from models.py rename to polyguard-rl/models.py diff --git a/notebooks/01_data_building.ipynb b/polyguard-rl/notebooks/01_data_building.ipynb similarity index 100% rename from notebooks/01_data_building.ipynb rename to polyguard-rl/notebooks/01_data_building.ipynb diff --git a/notebooks/02_knowledge_graph.ipynb b/polyguard-rl/notebooks/02_knowledge_graph.ipynb similarity index 100% rename from notebooks/02_knowledge_graph.ipynb rename to polyguard-rl/notebooks/02_knowledge_graph.ipynb diff --git a/notebooks/03_risk_models.ipynb b/polyguard-rl/notebooks/03_risk_models.ipynb similarity index 100% rename from notebooks/03_risk_models.ipynb rename to polyguard-rl/notebooks/03_risk_models.ipynb diff --git a/notebooks/04_environment_validation.ipynb b/polyguard-rl/notebooks/04_environment_validation.ipynb similarity index 100% rename from notebooks/04_environment_validation.ipynb rename to polyguard-rl/notebooks/04_environment_validation.ipynb diff --git a/notebooks/05_sft_debug.ipynb b/polyguard-rl/notebooks/05_sft_debug.ipynb similarity index 100% rename from notebooks/05_sft_debug.ipynb rename to polyguard-rl/notebooks/05_sft_debug.ipynb diff --git a/notebooks/06_grpo_debug.ipynb b/polyguard-rl/notebooks/06_grpo_debug.ipynb similarity index 100% rename from notebooks/06_grpo_debug.ipynb rename to polyguard-rl/notebooks/06_grpo_debug.ipynb diff --git a/notebooks/07_policy_analysis.ipynb b/polyguard-rl/notebooks/07_policy_analysis.ipynb similarity index 100% rename from notebooks/07_policy_analysis.ipynb rename to polyguard-rl/notebooks/07_policy_analysis.ipynb diff --git a/notebooks/08_dosing_analysis.ipynb b/polyguard-rl/notebooks/08_dosing_analysis.ipynb similarity index 100% rename from notebooks/08_dosing_analysis.ipynb rename to polyguard-rl/notebooks/08_dosing_analysis.ipynb diff --git a/notebooks/09_training_loop.ipynb b/polyguard-rl/notebooks/09_training_loop.ipynb similarity index 100% rename from notebooks/09_training_loop.ipynb rename to polyguard-rl/notebooks/09_training_loop.ipynb diff --git a/openenv.yaml b/polyguard-rl/openenv.yaml similarity index 100% rename from openenv.yaml rename to polyguard-rl/openenv.yaml diff --git a/polyguard-rl/outputs/README.md b/polyguard-rl/outputs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c04cb758742693ff03ced24cb50724eb5437ab8e --- /dev/null +++ b/polyguard-rl/outputs/README.md @@ -0,0 +1,11 @@ +# Local run outputs (not in Git) + +Training sweeps, plots, and copied **active-model reports** land here. + +After `python scripts/install_hf_active_bundle.py` (without `--no-reports`), you should have: + +- **`reports/active_model/`** — JSON reports mirrored from the HF bundle (eval, GRPO/SFT summaries, sweep metadata, etc.). + +The manifest at `checkpoints/active/active_model_manifest.json` still lists the same report paths relative to the repo root (`outputs/reports/active_model/...`). + +Smoke summaries for docs may also be mirrored under `docs/results/` (those *are* tracked in Git). diff --git a/polyguard_rl.egg-info/PKG-INFO b/polyguard-rl/polyguard_rl.egg-info/PKG-INFO similarity index 100% rename from polyguard_rl.egg-info/PKG-INFO rename to polyguard-rl/polyguard_rl.egg-info/PKG-INFO diff --git a/polyguard_rl.egg-info/SOURCES.txt b/polyguard-rl/polyguard_rl.egg-info/SOURCES.txt similarity index 100% rename from polyguard_rl.egg-info/SOURCES.txt rename to polyguard-rl/polyguard_rl.egg-info/SOURCES.txt diff --git a/polyguard_rl.egg-info/dependency_links.txt b/polyguard-rl/polyguard_rl.egg-info/dependency_links.txt similarity index 100% rename from polyguard_rl.egg-info/dependency_links.txt rename to polyguard-rl/polyguard_rl.egg-info/dependency_links.txt diff --git a/polyguard_rl.egg-info/entry_points.txt b/polyguard-rl/polyguard_rl.egg-info/entry_points.txt similarity index 100% rename from polyguard_rl.egg-info/entry_points.txt rename to polyguard-rl/polyguard_rl.egg-info/entry_points.txt diff --git a/polyguard_rl.egg-info/requires.txt b/polyguard-rl/polyguard_rl.egg-info/requires.txt similarity index 100% rename from polyguard_rl.egg-info/requires.txt rename to polyguard-rl/polyguard_rl.egg-info/requires.txt diff --git a/polyguard_rl.egg-info/top_level.txt b/polyguard-rl/polyguard_rl.egg-info/top_level.txt similarity index 100% rename from polyguard_rl.egg-info/top_level.txt rename to polyguard-rl/polyguard_rl.egg-info/top_level.txt diff --git a/pyproject.toml b/polyguard-rl/pyproject.toml similarity index 100% rename from pyproject.toml rename to polyguard-rl/pyproject.toml diff --git a/requirements-space.txt b/polyguard-rl/requirements-space.txt similarity index 100% rename from requirements-space.txt rename to polyguard-rl/requirements-space.txt diff --git a/requirements.txt b/polyguard-rl/requirements.txt similarity index 100% rename from requirements.txt rename to polyguard-rl/requirements.txt diff --git a/scripts/acceptance_gate.py b/polyguard-rl/scripts/acceptance_gate.py old mode 100644 new mode 100755 similarity index 100% rename from scripts/acceptance_gate.py rename to polyguard-rl/scripts/acceptance_gate.py diff --git a/scripts/activate_sweep_model.py b/polyguard-rl/scripts/activate_sweep_model.py similarity index 100% rename from scripts/activate_sweep_model.py rename to polyguard-rl/scripts/activate_sweep_model.py diff --git a/scripts/benchmark_inference.py b/polyguard-rl/scripts/benchmark_inference.py similarity index 100% rename from scripts/benchmark_inference.py rename to polyguard-rl/scripts/benchmark_inference.py diff --git a/scripts/bootstrap_data.py b/polyguard-rl/scripts/bootstrap_data.py old mode 100644 new mode 100755 similarity index 100% rename from scripts/bootstrap_data.py rename to polyguard-rl/scripts/bootstrap_data.py diff --git a/scripts/bootstrap_frontend.sh b/polyguard-rl/scripts/bootstrap_frontend.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/bootstrap_frontend.sh rename to polyguard-rl/scripts/bootstrap_frontend.sh diff --git a/scripts/bootstrap_openenv.sh b/polyguard-rl/scripts/bootstrap_openenv.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/bootstrap_openenv.sh rename to polyguard-rl/scripts/bootstrap_openenv.sh diff --git a/scripts/bootstrap_venv.sh b/polyguard-rl/scripts/bootstrap_venv.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/bootstrap_venv.sh rename to polyguard-rl/scripts/bootstrap_venv.sh diff --git a/scripts/build_drug_knowledge.py b/polyguard-rl/scripts/build_drug_knowledge.py similarity index 100% rename from scripts/build_drug_knowledge.py rename to polyguard-rl/scripts/build_drug_knowledge.py diff --git a/scripts/build_improvement_evidence_bundle.py b/polyguard-rl/scripts/build_improvement_evidence_bundle.py similarity index 100% rename from scripts/build_improvement_evidence_bundle.py rename to polyguard-rl/scripts/build_improvement_evidence_bundle.py diff --git a/scripts/build_retrieval_index.py b/polyguard-rl/scripts/build_retrieval_index.py similarity index 100% rename from scripts/build_retrieval_index.py rename to polyguard-rl/scripts/build_retrieval_index.py diff --git a/scripts/build_scenarios.py b/polyguard-rl/scripts/build_scenarios.py similarity index 100% rename from scripts/build_scenarios.py rename to polyguard-rl/scripts/build_scenarios.py diff --git a/scripts/build_synthetic_patients.py b/polyguard-rl/scripts/build_synthetic_patients.py similarity index 100% rename from scripts/build_synthetic_patients.py rename to polyguard-rl/scripts/build_synthetic_patients.py diff --git a/scripts/build_training_corpus.py b/polyguard-rl/scripts/build_training_corpus.py similarity index 100% rename from scripts/build_training_corpus.py rename to polyguard-rl/scripts/build_training_corpus.py diff --git a/scripts/crawl_labels_and_guidelines.py b/polyguard-rl/scripts/crawl_labels_and_guidelines.py similarity index 100% rename from scripts/crawl_labels_and_guidelines.py rename to polyguard-rl/scripts/crawl_labels_and_guidelines.py diff --git a/scripts/deploy_evidence_space.py b/polyguard-rl/scripts/deploy_evidence_space.py similarity index 100% rename from scripts/deploy_evidence_space.py rename to polyguard-rl/scripts/deploy_evidence_space.py diff --git a/scripts/deploy_final_artifact_space.py b/polyguard-rl/scripts/deploy_final_artifact_space.py similarity index 100% rename from scripts/deploy_final_artifact_space.py rename to polyguard-rl/scripts/deploy_final_artifact_space.py diff --git a/scripts/deploy_space.sh b/polyguard-rl/scripts/deploy_space.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/deploy_space.sh rename to polyguard-rl/scripts/deploy_space.sh diff --git a/scripts/deploy_space_api.py b/polyguard-rl/scripts/deploy_space_api.py similarity index 100% rename from scripts/deploy_space_api.py rename to polyguard-rl/scripts/deploy_space_api.py diff --git a/scripts/deploy_training_space.py b/polyguard-rl/scripts/deploy_training_space.py similarity index 100% rename from scripts/deploy_training_space.py rename to polyguard-rl/scripts/deploy_training_space.py diff --git a/scripts/evaluate_all.py b/polyguard-rl/scripts/evaluate_all.py similarity index 100% rename from scripts/evaluate_all.py rename to polyguard-rl/scripts/evaluate_all.py diff --git a/scripts/evaluate_baselines.py b/polyguard-rl/scripts/evaluate_baselines.py similarity index 100% rename from scripts/evaluate_baselines.py rename to polyguard-rl/scripts/evaluate_baselines.py diff --git a/scripts/evaluate_compare_runs.py b/polyguard-rl/scripts/evaluate_compare_runs.py similarity index 100% rename from scripts/evaluate_compare_runs.py rename to polyguard-rl/scripts/evaluate_compare_runs.py diff --git a/scripts/evaluate_policy_ablations.py b/polyguard-rl/scripts/evaluate_policy_ablations.py similarity index 100% rename from scripts/evaluate_policy_ablations.py rename to polyguard-rl/scripts/evaluate_policy_ablations.py diff --git a/scripts/export_adapters.py b/polyguard-rl/scripts/export_adapters.py similarity index 100% rename from scripts/export_adapters.py rename to polyguard-rl/scripts/export_adapters.py diff --git a/scripts/generate_hf_training_report.py b/polyguard-rl/scripts/generate_hf_training_report.py similarity index 100% rename from scripts/generate_hf_training_report.py rename to polyguard-rl/scripts/generate_hf_training_report.py diff --git a/scripts/generate_sft_data.py b/polyguard-rl/scripts/generate_sft_data.py similarity index 100% rename from scripts/generate_sft_data.py rename to polyguard-rl/scripts/generate_sft_data.py diff --git a/scripts/generate_submission_evidence.py b/polyguard-rl/scripts/generate_submission_evidence.py similarity index 100% rename from scripts/generate_submission_evidence.py rename to polyguard-rl/scripts/generate_submission_evidence.py diff --git a/scripts/ingest_open_drug_sources.py b/polyguard-rl/scripts/ingest_open_drug_sources.py similarity index 100% rename from scripts/ingest_open_drug_sources.py rename to polyguard-rl/scripts/ingest_open_drug_sources.py diff --git a/scripts/install_hf_active_bundle.py b/polyguard-rl/scripts/install_hf_active_bundle.py similarity index 100% rename from scripts/install_hf_active_bundle.py rename to polyguard-rl/scripts/install_hf_active_bundle.py diff --git a/scripts/merge_adapters_safe.py b/polyguard-rl/scripts/merge_adapters_safe.py similarity index 100% rename from scripts/merge_adapters_safe.py rename to polyguard-rl/scripts/merge_adapters_safe.py diff --git a/scripts/monitor_training_space_status.py b/polyguard-rl/scripts/monitor_training_space_status.py similarity index 100% rename from scripts/monitor_training_space_status.py rename to polyguard-rl/scripts/monitor_training_space_status.py diff --git a/scripts/package_active_model_bundle.py b/polyguard-rl/scripts/package_active_model_bundle.py similarity index 100% rename from scripts/package_active_model_bundle.py rename to polyguard-rl/scripts/package_active_model_bundle.py diff --git a/scripts/pause_training_space.py b/polyguard-rl/scripts/pause_training_space.py similarity index 100% rename from scripts/pause_training_space.py rename to polyguard-rl/scripts/pause_training_space.py diff --git a/scripts/preprocess_mock_ehr.py b/polyguard-rl/scripts/preprocess_mock_ehr.py similarity index 100% rename from scripts/preprocess_mock_ehr.py rename to polyguard-rl/scripts/preprocess_mock_ehr.py diff --git a/scripts/pull_submission_evidence.py b/polyguard-rl/scripts/pull_submission_evidence.py similarity index 100% rename from scripts/pull_submission_evidence.py rename to polyguard-rl/scripts/pull_submission_evidence.py diff --git a/scripts/pull_sweep_artifacts.py b/polyguard-rl/scripts/pull_sweep_artifacts.py similarity index 100% rename from scripts/pull_sweep_artifacts.py rename to polyguard-rl/scripts/pull_sweep_artifacts.py diff --git a/scripts/pull_training_artifacts.py b/polyguard-rl/scripts/pull_training_artifacts.py similarity index 100% rename from scripts/pull_training_artifacts.py rename to polyguard-rl/scripts/pull_training_artifacts.py diff --git a/scripts/render_diagram_images.py b/polyguard-rl/scripts/render_diagram_images.py similarity index 100% rename from scripts/render_diagram_images.py rename to polyguard-rl/scripts/render_diagram_images.py diff --git a/scripts/run_all_local.sh b/polyguard-rl/scripts/run_all_local.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/run_all_local.sh rename to polyguard-rl/scripts/run_all_local.sh diff --git a/scripts/run_api_local.sh b/polyguard-rl/scripts/run_api_local.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/run_api_local.sh rename to polyguard-rl/scripts/run_api_local.sh diff --git a/scripts/run_env_local.sh b/polyguard-rl/scripts/run_env_local.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/run_env_local.sh rename to polyguard-rl/scripts/run_env_local.sh diff --git a/scripts/run_robustness_suite.py b/polyguard-rl/scripts/run_robustness_suite.py similarity index 100% rename from scripts/run_robustness_suite.py rename to polyguard-rl/scripts/run_robustness_suite.py diff --git a/scripts/run_ui_local.sh b/polyguard-rl/scripts/run_ui_local.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/run_ui_local.sh rename to polyguard-rl/scripts/run_ui_local.sh diff --git a/scripts/smoke_test_all.sh b/polyguard-rl/scripts/smoke_test_all.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/smoke_test_all.sh rename to polyguard-rl/scripts/smoke_test_all.sh diff --git a/scripts/test_inference_postsave.py b/polyguard-rl/scripts/test_inference_postsave.py similarity index 100% rename from scripts/test_inference_postsave.py rename to polyguard-rl/scripts/test_inference_postsave.py diff --git a/scripts/train_dose_model.py b/polyguard-rl/scripts/train_dose_model.py similarity index 100% rename from scripts/train_dose_model.py rename to polyguard-rl/scripts/train_dose_model.py diff --git a/scripts/train_graph_model.py b/polyguard-rl/scripts/train_graph_model.py similarity index 100% rename from scripts/train_graph_model.py rename to polyguard-rl/scripts/train_graph_model.py diff --git a/scripts/train_grpo_dosing.py b/polyguard-rl/scripts/train_grpo_dosing.py similarity index 100% rename from scripts/train_grpo_dosing.py rename to polyguard-rl/scripts/train_grpo_dosing.py diff --git a/scripts/train_grpo_planner.py b/polyguard-rl/scripts/train_grpo_planner.py old mode 100644 new mode 100755 similarity index 100% rename from scripts/train_grpo_planner.py rename to polyguard-rl/scripts/train_grpo_planner.py diff --git a/scripts/train_grpo_policy.py b/polyguard-rl/scripts/train_grpo_policy.py similarity index 100% rename from scripts/train_grpo_policy.py rename to polyguard-rl/scripts/train_grpo_policy.py diff --git a/scripts/train_grpo_supervisor.py b/polyguard-rl/scripts/train_grpo_supervisor.py similarity index 100% rename from scripts/train_grpo_supervisor.py rename to polyguard-rl/scripts/train_grpo_supervisor.py diff --git a/scripts/train_grpo_trl.py b/polyguard-rl/scripts/train_grpo_trl.py similarity index 100% rename from scripts/train_grpo_trl.py rename to polyguard-rl/scripts/train_grpo_trl.py diff --git a/scripts/train_risk_model.py b/polyguard-rl/scripts/train_risk_model.py similarity index 100% rename from scripts/train_risk_model.py rename to polyguard-rl/scripts/train_risk_model.py diff --git a/scripts/train_sft.py b/polyguard-rl/scripts/train_sft.py similarity index 100% rename from scripts/train_sft.py rename to polyguard-rl/scripts/train_sft.py diff --git a/scripts/train_sft_trl.py b/polyguard-rl/scripts/train_sft_trl.py similarity index 100% rename from scripts/train_sft_trl.py rename to polyguard-rl/scripts/train_sft_trl.py diff --git a/scripts/validate_submission_links.py b/polyguard-rl/scripts/validate_submission_links.py similarity index 100% rename from scripts/validate_submission_links.py rename to polyguard-rl/scripts/validate_submission_links.py diff --git a/scripts/venv_utils.sh b/polyguard-rl/scripts/venv_utils.sh old mode 100644 new mode 100755 similarity index 100% rename from scripts/venv_utils.sh rename to polyguard-rl/scripts/venv_utils.sh diff --git a/server/__init__.py b/polyguard-rl/server/__init__.py similarity index 100% rename from server/__init__.py rename to polyguard-rl/server/__init__.py diff --git a/server/app.py b/polyguard-rl/server/app.py similarity index 100% rename from server/app.py rename to polyguard-rl/server/app.py diff --git a/submission_bundle/README.md b/polyguard-rl/submission_bundle/README.md similarity index 100% rename from submission_bundle/README.md rename to polyguard-rl/submission_bundle/README.md diff --git a/submission_bundle/grpo_training_cycle/docs_results/README.md b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/README.md similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/README.md rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/README.md diff --git a/submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/acceptance_gate.json diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/avg_process_fidelity.png b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/avg_reward.png b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/submission_bundle/grpo_training_cycle/docs_results/baselines.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/baselines.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/baselines.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/baselines.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/benchmark_report.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/benchmark_report.txt diff --git a/submission_bundle/grpo_training_cycle/docs_results/dose_train.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/dose_train.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/dose_train.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/dose_train.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/dosing_grpo.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/frontier_ready.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/graph_train.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/graph_train.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/graph_train.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/graph_train.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_ablation_report.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_auto.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_fallback_check.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_smoke.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/grpo_trl_run_strict_check.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/hf_space_verification.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/hf_training_status.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/improvement_report.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/improvement_report.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/improvement_report.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/improvement_report.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/improvement_report_benchmark.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/inference_benchmark.json diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/legality_rate.png b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/planner_grpo.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/plot_index.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/plot_index.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/plot_index.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/plot_index.json diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/postsave_inference.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/risk_train.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/risk_train.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/risk_train.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/risk_train.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/robustness.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/robustness.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/robustness.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/robustness.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/sft_run.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/sft_run.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/sft_run.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/sft_run.json diff --git a/submission_bundle/grpo_training_cycle/docs_results/sft_trl_run.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/sft_trl_run.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/sft_trl_run.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/sft_trl_run.json diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/success_rate.png b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/submission_bundle/grpo_training_cycle/docs_results/supervisor_grpo.json b/polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/supervisor_grpo.json similarity index 100% rename from submission_bundle/grpo_training_cycle/docs_results/supervisor_grpo.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/docs_results/supervisor_grpo.json diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/plots/avg_process_fidelity.png b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/plots/avg_reward.png b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/plots/legality_rate.png b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/plots/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/submission_bundle/grpo_training_cycle/plots/success_rate.png b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/grpo_training_cycle/plots/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/submission_bundle/grpo_training_cycle/reports/baselines.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/baselines.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/baselines.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/baselines.json diff --git a/submission_bundle/grpo_training_cycle/reports/benchmark_report.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/benchmark_report.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/benchmark_report.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/benchmark_report.json diff --git a/submission_bundle/grpo_training_cycle/reports/benchmark_report.txt b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/benchmark_report.txt similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/benchmark_report.txt rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/benchmark_report.txt diff --git a/submission_bundle/grpo_training_cycle/reports/grpo_ablation_report.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/grpo_ablation_report.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/grpo_ablation_report.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/grpo_ablation_report.json diff --git a/submission_bundle/grpo_training_cycle/reports/grpo_trl_run.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/grpo_trl_run.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/grpo_trl_run.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/grpo_trl_run.json diff --git a/submission_bundle/grpo_training_cycle/reports/hf_training_status.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/hf_training_status.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/hf_training_status.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/hf_training_status.json diff --git a/submission_bundle/grpo_training_cycle/reports/improvement_report.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/improvement_report.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/improvement_report.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/improvement_report.json diff --git a/submission_bundle/grpo_training_cycle/reports/plot_index.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/plot_index.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/plot_index.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/plot_index.json diff --git a/submission_bundle/grpo_training_cycle/reports/postsave_inference.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/postsave_inference.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/postsave_inference.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/postsave_inference.json diff --git a/submission_bundle/grpo_training_cycle/reports/sft_trl_run.json b/polyguard-rl/submission_bundle/grpo_training_cycle/reports/sft_trl_run.json similarity index 100% rename from submission_bundle/grpo_training_cycle/reports/sft_trl_run.json rename to polyguard-rl/submission_bundle/grpo_training_cycle/reports/sft_trl_run.json diff --git a/submission_bundle/local_results/docs_results/README.md b/polyguard-rl/submission_bundle/local_results/docs_results/README.md similarity index 100% rename from submission_bundle/local_results/docs_results/README.md rename to polyguard-rl/submission_bundle/local_results/docs_results/README.md diff --git a/submission_bundle/local_results/docs_results/acceptance_gate.json b/polyguard-rl/submission_bundle/local_results/docs_results/acceptance_gate.json similarity index 100% rename from submission_bundle/local_results/docs_results/acceptance_gate.json rename to polyguard-rl/submission_bundle/local_results/docs_results/acceptance_gate.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/anti_cheat_failure_rates.png b/polyguard-rl/submission_bundle/local_results/docs_results/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..5a667f30979c0e4ee981b89910fe48b9d5f64587 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1599181b9f2a499b59facf5629c0a413268a48d8a201662995e94a5f9411948a +size 50051 diff --git a/submission_bundle/local_results/docs_results/anti_hacking_overfit_report.json b/polyguard-rl/submission_bundle/local_results/docs_results/anti_hacking_overfit_report.json similarity index 100% rename from submission_bundle/local_results/docs_results/anti_hacking_overfit_report.json rename to polyguard-rl/submission_bundle/local_results/docs_results/anti_hacking_overfit_report.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/avg_process_fidelity.png b/polyguard-rl/submission_bundle/local_results/docs_results/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/avg_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/submission_bundle/local_results/docs_results/baselines.json b/polyguard-rl/submission_bundle/local_results/docs_results/baselines.json similarity index 100% rename from submission_bundle/local_results/docs_results/baselines.json rename to polyguard-rl/submission_bundle/local_results/docs_results/baselines.json diff --git a/submission_bundle/local_results/docs_results/benchmark_report.json b/polyguard-rl/submission_bundle/local_results/docs_results/benchmark_report.json similarity index 100% rename from submission_bundle/local_results/docs_results/benchmark_report.json rename to polyguard-rl/submission_bundle/local_results/docs_results/benchmark_report.json diff --git a/submission_bundle/local_results/docs_results/benchmark_report.txt b/polyguard-rl/submission_bundle/local_results/docs_results/benchmark_report.txt similarity index 100% rename from submission_bundle/local_results/docs_results/benchmark_report.txt rename to polyguard-rl/submission_bundle/local_results/docs_results/benchmark_report.txt diff --git a/submission_bundle/local_results/docs_results/dose_train.json b/polyguard-rl/submission_bundle/local_results/docs_results/dose_train.json similarity index 100% rename from submission_bundle/local_results/docs_results/dose_train.json rename to polyguard-rl/submission_bundle/local_results/docs_results/dose_train.json diff --git a/submission_bundle/local_results/docs_results/dosing_grpo.json b/polyguard-rl/submission_bundle/local_results/docs_results/dosing_grpo.json similarity index 100% rename from submission_bundle/local_results/docs_results/dosing_grpo.json rename to polyguard-rl/submission_bundle/local_results/docs_results/dosing_grpo.json diff --git a/submission_bundle/local_results/docs_results/frontier_ready.json b/polyguard-rl/submission_bundle/local_results/docs_results/frontier_ready.json similarity index 100% rename from submission_bundle/local_results/docs_results/frontier_ready.json rename to polyguard-rl/submission_bundle/local_results/docs_results/frontier_ready.json diff --git a/submission_bundle/local_results/docs_results/graph_train.json b/polyguard-rl/submission_bundle/local_results/docs_results/graph_train.json similarity index 100% rename from submission_bundle/local_results/docs_results/graph_train.json rename to polyguard-rl/submission_bundle/local_results/docs_results/graph_train.json diff --git a/submission_bundle/local_results/docs_results/grpo_ablation_report.json b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_ablation_report.json similarity index 100% rename from submission_bundle/local_results/docs_results/grpo_ablation_report.json rename to polyguard-rl/submission_bundle/local_results/docs_results/grpo_ablation_report.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/grpo_reward_curves.png b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..38c93fc5e74f14d64778bf3fc94463f312e3d6e6 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5233693b37cb7132d11d7d550072b99fc5202d2a0a7304aaffb07bbaf74c84a +size 19699 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/avg_process_fidelity.png b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/avg_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/legality_rate.png b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/success_rate.png b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_training_cycle/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/submission_bundle/local_results/docs_results/grpo_trl_run.json b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run.json similarity index 100% rename from submission_bundle/local_results/docs_results/grpo_trl_run.json rename to polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run.json diff --git a/submission_bundle/local_results/docs_results/grpo_trl_run_auto.json b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_auto.json similarity index 100% rename from submission_bundle/local_results/docs_results/grpo_trl_run_auto.json rename to polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_auto.json diff --git a/submission_bundle/local_results/docs_results/grpo_trl_run_fallback_check.json b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_fallback_check.json similarity index 100% rename from submission_bundle/local_results/docs_results/grpo_trl_run_fallback_check.json rename to polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_fallback_check.json diff --git a/submission_bundle/local_results/docs_results/grpo_trl_run_smoke.json b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_smoke.json similarity index 100% rename from submission_bundle/local_results/docs_results/grpo_trl_run_smoke.json rename to polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_smoke.json diff --git a/submission_bundle/local_results/docs_results/grpo_trl_run_strict_check.json b/polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_strict_check.json similarity index 100% rename from submission_bundle/local_results/docs_results/grpo_trl_run_strict_check.json rename to polyguard-rl/submission_bundle/local_results/docs_results/grpo_trl_run_strict_check.json diff --git a/submission_bundle/local_results/docs_results/hf_space_verification.json b/polyguard-rl/submission_bundle/local_results/docs_results/hf_space_verification.json similarity index 100% rename from submission_bundle/local_results/docs_results/hf_space_verification.json rename to polyguard-rl/submission_bundle/local_results/docs_results/hf_space_verification.json diff --git a/submission_bundle/local_results/docs_results/hf_sweep_summary.json b/polyguard-rl/submission_bundle/local_results/docs_results/hf_sweep_summary.json similarity index 100% rename from submission_bundle/local_results/docs_results/hf_sweep_summary.json rename to polyguard-rl/submission_bundle/local_results/docs_results/hf_sweep_summary.json diff --git a/submission_bundle/local_results/docs_results/hf_training_status.json b/polyguard-rl/submission_bundle/local_results/docs_results/hf_training_status.json similarity index 100% rename from submission_bundle/local_results/docs_results/hf_training_status.json rename to polyguard-rl/submission_bundle/local_results/docs_results/hf_training_status.json diff --git a/submission_bundle/local_results/docs_results/improvement_report.json b/polyguard-rl/submission_bundle/local_results/docs_results/improvement_report.json similarity index 100% rename from submission_bundle/local_results/docs_results/improvement_report.json rename to polyguard-rl/submission_bundle/local_results/docs_results/improvement_report.json diff --git a/submission_bundle/local_results/docs_results/improvement_report_benchmark.json b/polyguard-rl/submission_bundle/local_results/docs_results/improvement_report_benchmark.json similarity index 100% rename from submission_bundle/local_results/docs_results/improvement_report_benchmark.json rename to polyguard-rl/submission_bundle/local_results/docs_results/improvement_report_benchmark.json diff --git a/submission_bundle/local_results/docs_results/inference_benchmark.json b/polyguard-rl/submission_bundle/local_results/docs_results/inference_benchmark.json similarity index 100% rename from submission_bundle/local_results/docs_results/inference_benchmark.json rename to polyguard-rl/submission_bundle/local_results/docs_results/inference_benchmark.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/inference_latency_validity.png b/polyguard-rl/submission_bundle/local_results/docs_results/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..a86ea68fe05da60d6c6322a6eb1fa1fb60a788dc --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116b66e6d10c5f7e05dcf959f88265b502bb81e44c871d976b185d328cd2ccba +size 53622 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/inference_validity_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..026a26ad75ae34d513b7c1affbb83daf6675fd7c --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ba7cb41ca42268f40219ef7556d8b3e61156af3bc8c97dfed4c7b6b4bc39e9 +size 49225 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/legality_rate.png b/polyguard-rl/submission_bundle/local_results/docs_results/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/submission_bundle/local_results/docs_results/planner_grpo.json b/polyguard-rl/submission_bundle/local_results/docs_results/planner_grpo.json similarity index 100% rename from submission_bundle/local_results/docs_results/planner_grpo.json rename to polyguard-rl/submission_bundle/local_results/docs_results/planner_grpo.json diff --git a/submission_bundle/local_results/docs_results/plot_index.json b/polyguard-rl/submission_bundle/local_results/docs_results/plot_index.json similarity index 100% rename from submission_bundle/local_results/docs_results/plot_index.json rename to polyguard-rl/submission_bundle/local_results/docs_results/plot_index.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/submission_bundle/local_results/docs_results/postsave_inference.json b/polyguard-rl/submission_bundle/local_results/docs_results/postsave_inference.json similarity index 100% rename from submission_bundle/local_results/docs_results/postsave_inference.json rename to polyguard-rl/submission_bundle/local_results/docs_results/postsave_inference.json diff --git a/submission_bundle/local_results/docs_results/postsave_inference_smoke.json b/polyguard-rl/submission_bundle/local_results/docs_results/postsave_inference_smoke.json similarity index 100% rename from submission_bundle/local_results/docs_results/postsave_inference_smoke.json rename to polyguard-rl/submission_bundle/local_results/docs_results/postsave_inference_smoke.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_grpo_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8a0d04f71a614b34ebf457f2568ea9c455c4e2cc --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af6f1dce9ab1a81ac026e281438d9a40bee41aaea6eed0190df75033ec6e873 +size 45366 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_sft_loss.png b/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..02106a69703a6545da9753f86d425f25b9af3256 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fbf956625fe63139c7341686dee2fe928e4a908380beebe06653f4b532b937 +size 40728 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_sft_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..35a0cca6ff890447edc7063f56b3c947ae418cbe --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd868c27b014c51f4b972045f5d1156e6debb21d21835b6e3da4dbab7f5f6d77 +size 44621 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/reward_component_bars.png b/polyguard-rl/submission_bundle/local_results/docs_results/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..713b1354077916a5ded3a1fb4684b75cca0d4d70 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e45b33c89bbb501551ab6ebdd6ff958994d00259e8bfd04f19a6affdb28a66 +size 22230 diff --git a/submission_bundle/local_results/docs_results/risk_train.json b/polyguard-rl/submission_bundle/local_results/docs_results/risk_train.json similarity index 100% rename from submission_bundle/local_results/docs_results/risk_train.json rename to polyguard-rl/submission_bundle/local_results/docs_results/risk_train.json diff --git a/submission_bundle/local_results/docs_results/robustness.json b/polyguard-rl/submission_bundle/local_results/docs_results/robustness.json similarity index 100% rename from submission_bundle/local_results/docs_results/robustness.json rename to polyguard-rl/submission_bundle/local_results/docs_results/robustness.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/sft_loss_curves.png b/polyguard-rl/submission_bundle/local_results/docs_results/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..b24b4375e766ffec8647952af25b6baed4bc0abf --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa82325064843def8e501725340f29b5e0782825dcce9fba5349fd6e68b559e +size 76094 diff --git a/submission_bundle/local_results/docs_results/sft_run.json b/polyguard-rl/submission_bundle/local_results/docs_results/sft_run.json similarity index 100% rename from submission_bundle/local_results/docs_results/sft_run.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sft_run.json diff --git a/submission_bundle/local_results/docs_results/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/docs_results/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/docs_results/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sft_trl_run.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/sft_validity_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8e95b1dd424b9e8bc4d2842b0f26508b1096b1b5 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986742eddcce5b63a43a0618a24f161034d22c4818156f32a055f30abecbb019 +size 46841 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/sft_vs_grpo_reward.png b/polyguard-rl/submission_bundle/local_results/docs_results/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a6b7230c05cdacd4e39c1612af3f65c1c70f4d64 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce92f65e018f8374be0e4db966d2ad33c9f86ee143e6386c14a6f055177d5a25 +size 50183 diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/success_rate.png b/polyguard-rl/submission_bundle/local_results/docs_results/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/submission_bundle/local_results/docs_results/supervisor_grpo.json b/polyguard-rl/submission_bundle/local_results/docs_results/supervisor_grpo.json similarity index 100% rename from submission_bundle/local_results/docs_results/supervisor_grpo.json rename to polyguard-rl/submission_bundle/local_results/docs_results/supervisor_grpo.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/polyguard-rl/submission_bundle/local_results/docs_results/train_holdout_gap.png b/polyguard-rl/submission_bundle/local_results/docs_results/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..d2f671922080bd4787c7b9f16f4526cfc2e20b94 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/docs_results/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202c512e64e66edf886f78e400795acca9a01b0832ce00ee14e95ebd1effa77b +size 45030 diff --git a/polyguard-rl/submission_bundle/local_results/plots/anti_cheat_failure_rates.png b/polyguard-rl/submission_bundle/local_results/plots/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..5a667f30979c0e4ee981b89910fe48b9d5f64587 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1599181b9f2a499b59facf5629c0a413268a48d8a201662995e94a5f9411948a +size 50051 diff --git a/polyguard-rl/submission_bundle/local_results/plots/avg_process_fidelity.png b/polyguard-rl/submission_bundle/local_results/plots/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/local_results/plots/avg_reward.png b/polyguard-rl/submission_bundle/local_results/plots/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/submission_bundle/local_results/plots/grpo_reward_curves.png b/polyguard-rl/submission_bundle/local_results/plots/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..38c93fc5e74f14d64778bf3fc94463f312e3d6e6 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5233693b37cb7132d11d7d550072b99fc5202d2a0a7304aaffb07bbaf74c84a +size 19699 diff --git a/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/avg_process_fidelity.png b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/avg_reward.png b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/legality_rate.png b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/success_rate.png b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/grpo_training_cycle/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/polyguard-rl/submission_bundle/local_results/plots/inference_latency_validity.png b/polyguard-rl/submission_bundle/local_results/plots/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..a86ea68fe05da60d6c6322a6eb1fa1fb60a788dc --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116b66e6d10c5f7e05dcf959f88265b502bb81e44c871d976b185d328cd2ccba +size 53622 diff --git a/polyguard-rl/submission_bundle/local_results/plots/inference_validity_reward.png b/polyguard-rl/submission_bundle/local_results/plots/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..026a26ad75ae34d513b7c1affbb83daf6675fd7c --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ba7cb41ca42268f40219ef7556d8b3e61156af3bc8c97dfed4c7b6b4bc39e9 +size 49225 diff --git a/polyguard-rl/submission_bundle/local_results/plots/legality_rate.png b/polyguard-rl/submission_bundle/local_results/plots/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/submission_bundle/local_results/plots/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/local_results/plots/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/submission_bundle/local_results/plots/qwen_model_grpo_reward.png b/polyguard-rl/submission_bundle/local_results/plots/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8a0d04f71a614b34ebf457f2568ea9c455c4e2cc --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af6f1dce9ab1a81ac026e281438d9a40bee41aaea6eed0190df75033ec6e873 +size 45366 diff --git a/polyguard-rl/submission_bundle/local_results/plots/qwen_model_sft_loss.png b/polyguard-rl/submission_bundle/local_results/plots/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..02106a69703a6545da9753f86d425f25b9af3256 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fbf956625fe63139c7341686dee2fe928e4a908380beebe06653f4b532b937 +size 40728 diff --git a/polyguard-rl/submission_bundle/local_results/plots/qwen_model_sft_reward.png b/polyguard-rl/submission_bundle/local_results/plots/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..35a0cca6ff890447edc7063f56b3c947ae418cbe --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd868c27b014c51f4b972045f5d1156e6debb21d21835b6e3da4dbab7f5f6d77 +size 44621 diff --git a/polyguard-rl/submission_bundle/local_results/plots/reward_component_bars.png b/polyguard-rl/submission_bundle/local_results/plots/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..713b1354077916a5ded3a1fb4684b75cca0d4d70 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e45b33c89bbb501551ab6ebdd6ff958994d00259e8bfd04f19a6affdb28a66 +size 22230 diff --git a/polyguard-rl/submission_bundle/local_results/plots/sft_loss_curves.png b/polyguard-rl/submission_bundle/local_results/plots/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..b24b4375e766ffec8647952af25b6baed4bc0abf --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa82325064843def8e501725340f29b5e0782825dcce9fba5349fd6e68b559e +size 76094 diff --git a/polyguard-rl/submission_bundle/local_results/plots/sft_validity_reward.png b/polyguard-rl/submission_bundle/local_results/plots/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8e95b1dd424b9e8bc4d2842b0f26508b1096b1b5 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986742eddcce5b63a43a0618a24f161034d22c4818156f32a055f30abecbb019 +size 46841 diff --git a/polyguard-rl/submission_bundle/local_results/plots/sft_vs_grpo_reward.png b/polyguard-rl/submission_bundle/local_results/plots/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a6b7230c05cdacd4e39c1612af3f65c1c70f4d64 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce92f65e018f8374be0e4db966d2ad33c9f86ee143e6386c14a6f055177d5a25 +size 50183 diff --git a/polyguard-rl/submission_bundle/local_results/plots/success_rate.png b/polyguard-rl/submission_bundle/local_results/plots/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/polyguard-rl/submission_bundle/local_results/plots/train_holdout_gap.png b/polyguard-rl/submission_bundle/local_results/plots/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..d2f671922080bd4787c7b9f16f4526cfc2e20b94 --- /dev/null +++ b/polyguard-rl/submission_bundle/local_results/plots/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202c512e64e66edf886f78e400795acca9a01b0832ce00ee14e95ebd1effa77b +size 45030 diff --git a/submission_bundle/local_results/reports/acceptance_gate.json b/polyguard-rl/submission_bundle/local_results/reports/acceptance_gate.json similarity index 100% rename from submission_bundle/local_results/reports/acceptance_gate.json rename to polyguard-rl/submission_bundle/local_results/reports/acceptance_gate.json diff --git a/submission_bundle/local_results/reports/anti_hacking_overfit_report.json b/polyguard-rl/submission_bundle/local_results/reports/anti_hacking_overfit_report.json similarity index 100% rename from submission_bundle/local_results/reports/anti_hacking_overfit_report.json rename to polyguard-rl/submission_bundle/local_results/reports/anti_hacking_overfit_report.json diff --git a/submission_bundle/local_results/reports/baselines.json b/polyguard-rl/submission_bundle/local_results/reports/baselines.json similarity index 100% rename from submission_bundle/local_results/reports/baselines.json rename to polyguard-rl/submission_bundle/local_results/reports/baselines.json diff --git a/submission_bundle/local_results/reports/benchmark_report.json b/polyguard-rl/submission_bundle/local_results/reports/benchmark_report.json similarity index 100% rename from submission_bundle/local_results/reports/benchmark_report.json rename to polyguard-rl/submission_bundle/local_results/reports/benchmark_report.json diff --git a/submission_bundle/local_results/reports/benchmark_report.txt b/polyguard-rl/submission_bundle/local_results/reports/benchmark_report.txt similarity index 100% rename from submission_bundle/local_results/reports/benchmark_report.txt rename to polyguard-rl/submission_bundle/local_results/reports/benchmark_report.txt diff --git a/submission_bundle/local_results/reports/dose_train.json b/polyguard-rl/submission_bundle/local_results/reports/dose_train.json similarity index 100% rename from submission_bundle/local_results/reports/dose_train.json rename to polyguard-rl/submission_bundle/local_results/reports/dose_train.json diff --git a/submission_bundle/local_results/reports/dosing_grpo.json b/polyguard-rl/submission_bundle/local_results/reports/dosing_grpo.json similarity index 100% rename from submission_bundle/local_results/reports/dosing_grpo.json rename to polyguard-rl/submission_bundle/local_results/reports/dosing_grpo.json diff --git a/submission_bundle/local_results/reports/frontier_ready.json b/polyguard-rl/submission_bundle/local_results/reports/frontier_ready.json similarity index 100% rename from submission_bundle/local_results/reports/frontier_ready.json rename to polyguard-rl/submission_bundle/local_results/reports/frontier_ready.json diff --git a/submission_bundle/local_results/reports/graph_train.json b/polyguard-rl/submission_bundle/local_results/reports/graph_train.json similarity index 100% rename from submission_bundle/local_results/reports/graph_train.json rename to polyguard-rl/submission_bundle/local_results/reports/graph_train.json diff --git a/submission_bundle/local_results/reports/grpo_ablation_report.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_ablation_report.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_ablation_report.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_ablation_report.json diff --git a/submission_bundle/local_results/reports/grpo_training_cycle/grpo_trl_run.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_training_cycle/grpo_trl_run.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_training_cycle/grpo_trl_run.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_training_cycle/grpo_trl_run.json diff --git a/submission_bundle/local_results/reports/grpo_training_cycle/hf_training_status.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_training_cycle/hf_training_status.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_training_cycle/hf_training_status.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_training_cycle/hf_training_status.json diff --git a/submission_bundle/local_results/reports/grpo_trl_run.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_trl_run.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run.json diff --git a/submission_bundle/local_results/reports/grpo_trl_run_auto.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_auto.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_trl_run_auto.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_auto.json diff --git a/submission_bundle/local_results/reports/grpo_trl_run_fallback_check.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_fallback_check.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_trl_run_fallback_check.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_fallback_check.json diff --git a/submission_bundle/local_results/reports/grpo_trl_run_smoke.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_smoke.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_trl_run_smoke.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_smoke.json diff --git a/submission_bundle/local_results/reports/grpo_trl_run_strict_check.json b/polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_strict_check.json similarity index 100% rename from submission_bundle/local_results/reports/grpo_trl_run_strict_check.json rename to polyguard-rl/submission_bundle/local_results/reports/grpo_trl_run_strict_check.json diff --git a/submission_bundle/local_results/reports/hf_sweep_summary.json b/polyguard-rl/submission_bundle/local_results/reports/hf_sweep_summary.json similarity index 100% rename from submission_bundle/local_results/reports/hf_sweep_summary.json rename to polyguard-rl/submission_bundle/local_results/reports/hf_sweep_summary.json diff --git a/submission_bundle/local_results/reports/hf_training_status.json b/polyguard-rl/submission_bundle/local_results/reports/hf_training_status.json similarity index 100% rename from submission_bundle/local_results/reports/hf_training_status.json rename to polyguard-rl/submission_bundle/local_results/reports/hf_training_status.json diff --git a/submission_bundle/local_results/reports/improvement_report.json b/polyguard-rl/submission_bundle/local_results/reports/improvement_report.json similarity index 100% rename from submission_bundle/local_results/reports/improvement_report.json rename to polyguard-rl/submission_bundle/local_results/reports/improvement_report.json diff --git a/submission_bundle/local_results/reports/improvement_report_benchmark.json b/polyguard-rl/submission_bundle/local_results/reports/improvement_report_benchmark.json similarity index 100% rename from submission_bundle/local_results/reports/improvement_report_benchmark.json rename to polyguard-rl/submission_bundle/local_results/reports/improvement_report_benchmark.json diff --git a/submission_bundle/local_results/reports/inference_benchmark.json b/polyguard-rl/submission_bundle/local_results/reports/inference_benchmark.json similarity index 100% rename from submission_bundle/local_results/reports/inference_benchmark.json rename to polyguard-rl/submission_bundle/local_results/reports/inference_benchmark.json diff --git a/submission_bundle/local_results/reports/planner_grpo.json b/polyguard-rl/submission_bundle/local_results/reports/planner_grpo.json similarity index 100% rename from submission_bundle/local_results/reports/planner_grpo.json rename to polyguard-rl/submission_bundle/local_results/reports/planner_grpo.json diff --git a/submission_bundle/local_results/reports/plot_index.json b/polyguard-rl/submission_bundle/local_results/reports/plot_index.json similarity index 100% rename from submission_bundle/local_results/reports/plot_index.json rename to polyguard-rl/submission_bundle/local_results/reports/plot_index.json diff --git a/submission_bundle/local_results/reports/postsave_inference.json b/polyguard-rl/submission_bundle/local_results/reports/postsave_inference.json similarity index 100% rename from submission_bundle/local_results/reports/postsave_inference.json rename to polyguard-rl/submission_bundle/local_results/reports/postsave_inference.json diff --git a/submission_bundle/local_results/reports/postsave_inference_smoke.json b/polyguard-rl/submission_bundle/local_results/reports/postsave_inference_smoke.json similarity index 100% rename from submission_bundle/local_results/reports/postsave_inference_smoke.json rename to polyguard-rl/submission_bundle/local_results/reports/postsave_inference_smoke.json diff --git a/submission_bundle/local_results/reports/risk_train.json b/polyguard-rl/submission_bundle/local_results/reports/risk_train.json similarity index 100% rename from submission_bundle/local_results/reports/risk_train.json rename to polyguard-rl/submission_bundle/local_results/reports/risk_train.json diff --git a/submission_bundle/local_results/reports/robustness.json b/polyguard-rl/submission_bundle/local_results/reports/robustness.json similarity index 100% rename from submission_bundle/local_results/reports/robustness.json rename to polyguard-rl/submission_bundle/local_results/reports/robustness.json diff --git a/submission_bundle/local_results/reports/sft_run.json b/polyguard-rl/submission_bundle/local_results/reports/sft_run.json similarity index 100% rename from submission_bundle/local_results/reports/sft_run.json rename to polyguard-rl/submission_bundle/local_results/reports/sft_run.json diff --git a/submission_bundle/local_results/reports/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/reports/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/reports/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/reports/sft_trl_run.json diff --git a/submission_bundle/local_results/reports/supervisor_grpo.json b/polyguard-rl/submission_bundle/local_results/reports/supervisor_grpo.json similarity index 100% rename from submission_bundle/local_results/reports/supervisor_grpo.json rename to polyguard-rl/submission_bundle/local_results/reports/supervisor_grpo.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/local_results/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/submission_bundle/notebooks/01_data_building.ipynb b/polyguard-rl/submission_bundle/notebooks/01_data_building.ipynb similarity index 100% rename from submission_bundle/notebooks/01_data_building.ipynb rename to polyguard-rl/submission_bundle/notebooks/01_data_building.ipynb diff --git a/submission_bundle/notebooks/02_knowledge_graph.ipynb b/polyguard-rl/submission_bundle/notebooks/02_knowledge_graph.ipynb similarity index 100% rename from submission_bundle/notebooks/02_knowledge_graph.ipynb rename to polyguard-rl/submission_bundle/notebooks/02_knowledge_graph.ipynb diff --git a/submission_bundle/notebooks/03_risk_models.ipynb b/polyguard-rl/submission_bundle/notebooks/03_risk_models.ipynb similarity index 100% rename from submission_bundle/notebooks/03_risk_models.ipynb rename to polyguard-rl/submission_bundle/notebooks/03_risk_models.ipynb diff --git a/submission_bundle/notebooks/04_environment_validation.ipynb b/polyguard-rl/submission_bundle/notebooks/04_environment_validation.ipynb similarity index 100% rename from submission_bundle/notebooks/04_environment_validation.ipynb rename to polyguard-rl/submission_bundle/notebooks/04_environment_validation.ipynb diff --git a/submission_bundle/notebooks/05_sft_debug.ipynb b/polyguard-rl/submission_bundle/notebooks/05_sft_debug.ipynb similarity index 100% rename from submission_bundle/notebooks/05_sft_debug.ipynb rename to polyguard-rl/submission_bundle/notebooks/05_sft_debug.ipynb diff --git a/submission_bundle/notebooks/06_grpo_debug.ipynb b/polyguard-rl/submission_bundle/notebooks/06_grpo_debug.ipynb similarity index 100% rename from submission_bundle/notebooks/06_grpo_debug.ipynb rename to polyguard-rl/submission_bundle/notebooks/06_grpo_debug.ipynb diff --git a/submission_bundle/notebooks/07_policy_analysis.ipynb b/polyguard-rl/submission_bundle/notebooks/07_policy_analysis.ipynb similarity index 100% rename from submission_bundle/notebooks/07_policy_analysis.ipynb rename to polyguard-rl/submission_bundle/notebooks/07_policy_analysis.ipynb diff --git a/submission_bundle/notebooks/08_dosing_analysis.ipynb b/polyguard-rl/submission_bundle/notebooks/08_dosing_analysis.ipynb similarity index 100% rename from submission_bundle/notebooks/08_dosing_analysis.ipynb rename to polyguard-rl/submission_bundle/notebooks/08_dosing_analysis.ipynb diff --git a/submission_bundle/notebooks/09_training_loop.ipynb b/polyguard-rl/submission_bundle/notebooks/09_training_loop.ipynb similarity index 100% rename from submission_bundle/notebooks/09_training_loop.ipynb rename to polyguard-rl/submission_bundle/notebooks/09_training_loop.ipynb diff --git a/submission_bundle/qwen_completed_runs/README.md b/polyguard-rl/submission_bundle/qwen_completed_runs/README.md similarity index 100% rename from submission_bundle/qwen_completed_runs/README.md rename to polyguard-rl/submission_bundle/qwen_completed_runs/README.md diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..be7e308acf74757bb7bb36bdd1dba04d57a70ea1 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cedb9e4d520d67f67969b2e7124883889950888e180858c2c84157554cc89a +size 58853 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..1113f0ebb7243a2b03530964ec6315ec65de5c63 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e53b7fef0413492382712ca477f10c471bb26561c8db0566a283be20f2c4d9 +size 51915 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2c8be057c8679f255a48baa579e2698510e36419 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37f7740740ecd7e509f0859ac962e045c2f0a07960d4e00ceccd1cb8281abe7 +size 58200 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..d0b118adb6baad555b9004547dcaf13089ea35fd --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e2d0f462663346ad69b09a236674983829f7e7a23706e12eed74b1901c20f0 +size 40643 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d5cde71fda1e2b3ca6ee855665b41f49f4c80a0f --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:026c7c4f37fe864a156fd8c14fb550925aeef456966e16e41db3b3c23801baba +size 39262 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae6a8fd169570681123487cdce84757f051bca9 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_exploit_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c66b9dcee62af7b6a1c435fa3c12167bb266bc1d1b239c2002d8243cbcd15 +size 30647 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_legality.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..67b35e0e3ae5398409bc3d7b662ef2ceb868a490 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/policy_ablation_legality.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c505ec81276d4a9eb9818ee33698e916148457ed24ed80b8b1f88b93cbd5e8 +size 33228 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fb1f5f1772c1fdb6f5538dca8436bab767e1c9f9 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/primary_reward_channel_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b188c89e9fac678f8874cdfb755d00e0f4c91e476b2b3038a08784dc8499b +size 51895 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..ab2682c3e2cbf2a3c4338f6c02e0b42768a9b583 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38ac3a639dd23250d7b11412643f43971c9ca60842b569b79a79d70aceeac71 +size 35813 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..b719773814c253c870ac8cba1d7aef285d0eb862 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b670cfff8fbce3f23f0f67df84d75370745cb51298ae0a8da3458ba55bb8986 +size 30067 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..86822aa56253de8959c09699e31b29405dbdb5d1 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4027700596bc53c29e6c8fe5058dbc84f2452c6e4299869ba3a41614a56b974f +size 37401 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..af5b44b88c7ecc050085c11062326ffe6c28f869 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4944f9b34e912cb5d7be323452b661c761b2e431c01e59cb23f858e2187f2df9 +size 20369 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..fc3a14cfc74ed4f75cc8a7fa336235d3207e55b2 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f792ccfb0694c6df64a0f01950b09a85258c25237c97c2e6e3d124253c42a9c +size 30813 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d9036a9840836250368a7993ad019e1175fa484a --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13336a7ac896659c56b9a07cd272616a4ead67402fad5292dff8a9560a84c981 +size 71817 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..a6337bcdf68cf0b10fa8184d6352cb7feb01beaf --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f5e5d7288978d9d536785b6f366570a00e2cb2cd7ee074411a78d01977ee78 +size 84524 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1a01a1728fe38823c531bee9b49cf156b9ba9fef --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77776497c52e5192e9658f44da24f05ee27bb137352dfb739d98b492291fdcb +size 70407 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..4373c02c5fee262fbb3ceab3d953f0be01b83691 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c01ac6fd578a4c19dbe5116ae2be54bb527a787947d79e29222896323203ba +size 62864 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..54dbe92d3a51ba9cc8883e782d1a5654cb9bd21e --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827b7584252d1e1fec71a719c107680ff762e5a5fdbe4eed6538a0794f8b5cf2 +size 72990 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..3facdc5ffea0ef2c6c064075f5077d8b57960cef --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3699683c4759100be7d76530e9ad6fee136a2cc4aeb34972308788f87c2eb75e +size 70799 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..6701100aaa3fd4db073a801c41988f71d4cab5d9 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e64b64dca38f903fb4a47a4f0071335cb6abe1089f3e31a365774b0722077b +size 61234 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f8da2140737f7aee30abee97a36e06008600d89d --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b748f7d53e33826d069832bbca2c18edb89804cb0cc68b89b2dca039cff0b8 +size 54900 diff --git a/submission_bundle/qwen_completed_runs/charts/generated/reward_component_bars.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/reward_component_bars.png similarity index 100% rename from submission_bundle/qwen_completed_runs/charts/generated/reward_component_bars.png rename to polyguard-rl/submission_bundle/qwen_completed_runs/charts/generated/reward_component_bars.png diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..7c504c44bf70b1e5366503778875e526db8df542 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0a632699048d87865977f75522575ea22ebd1940719952b8a1e9064cfdb3b4 +size 52299 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/avg_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..f3850bd6c22f350e534a1874e9e6d60f6f350450 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d134ce16a328958cdddfb2d1273d9e8c27f72bf28a38ebfbcbd0ccdc540fe4d5 +size 18724 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd99ec44ebcfc8ef0319042d68ba9a05f5b6d15 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d2f5e08331ad38d98102bdba6cb98ce50f2a1cfe71b9b04f295417550d9ae3 +size 55565 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..f33803d8077f18c5635493a1baafca5ec7e48a1f --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed58b43ac3029c3bd0ee87d5f70ebd8088fdcc04ffe870a04771cbd55a49b782 +size 51943 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/legality_rate.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..6d2567111051ba3a841fc3cde5b3076d788bd209 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19cfcb40ed7116e365ebc2d5b26b589a712274d697d2925cd6f2ab696f73e59 +size 50755 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..718bbaafed77be7c93097702d6668174747da2c5 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b0dd58526f8f70f77c6aef089d258ee3dbcddd159353b2dbe746cabece78b7 +size 46793 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..e81bfa202cd7ab2b5b20df64d10a099b2c94e7e5 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d3064d7d069b8165e4a68a03b3db1efa073f553861af1c745596d3f37eaf4 +size 49361 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..6751ab23d8c9be3b40c5c7307fa73845cf997aff --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72d41541c4ecc021764f494fa3c62f1ce32dd292dd7a7f9cc64a1b17c0a9373 +size 18820 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..73657c679f411f4a68b974e34259790f151b7d29 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d69fe49976c0a685b85ca8e88ea8c501bd737d1578beab1a006e40f599bf1f +size 76643 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..bcb4b3e7407c7a93a5bdb911d32fffd40e10dc48 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2dfea5ab9ec5eec712516ea73935973c205a3bd99eb2e77516adaa349e798 +size 50375 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b345245cec83cb42ed3e76a4e29ad58f5a492 --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e8c073db4cac6b72d15b95538c29e3f6127e2043b19a8a4394ae46ff84690 +size 55073 diff --git a/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..a8b0018ecdb6ed01eeffce1fb1a2da84ae3ddd4c --- /dev/null +++ b/polyguard-rl/submission_bundle/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11699ac5a5e2d9180cc5268fa1bed494ca08bd7a083f446599a659063710abcb +size 49197 diff --git a/submission_bundle/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json b/polyguard-rl/submission_bundle/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json similarity index 100% rename from submission_bundle/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json diff --git a/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json diff --git a/submission_bundle/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json diff --git a/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json diff --git a/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json diff --git a/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json b/polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json similarity index 100% rename from submission_bundle/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json rename to polyguard-rl/submission_bundle/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json diff --git a/submission_bundle/sft_baseline/docs_results/README.md b/polyguard-rl/submission_bundle/sft_baseline/docs_results/README.md similarity index 100% rename from submission_bundle/sft_baseline/docs_results/README.md rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/README.md diff --git a/submission_bundle/sft_baseline/docs_results/acceptance_gate.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/acceptance_gate.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/acceptance_gate.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/acceptance_gate.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/anti_cheat_failure_rates.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..5a667f30979c0e4ee981b89910fe48b9d5f64587 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1599181b9f2a499b59facf5629c0a413268a48d8a201662995e94a5f9411948a +size 50051 diff --git a/submission_bundle/sft_baseline/docs_results/anti_hacking_overfit_report.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/anti_hacking_overfit_report.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/anti_hacking_overfit_report.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/anti_hacking_overfit_report.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/avg_process_fidelity.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/avg_reward.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/submission_bundle/sft_baseline/docs_results/baselines.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/baselines.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/baselines.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/baselines.json diff --git a/submission_bundle/sft_baseline/docs_results/benchmark_report.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/benchmark_report.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/benchmark_report.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/benchmark_report.json diff --git a/submission_bundle/sft_baseline/docs_results/benchmark_report.txt b/polyguard-rl/submission_bundle/sft_baseline/docs_results/benchmark_report.txt similarity index 100% rename from submission_bundle/sft_baseline/docs_results/benchmark_report.txt rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/benchmark_report.txt diff --git a/submission_bundle/sft_baseline/docs_results/dose_train.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/dose_train.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/dose_train.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/dose_train.json diff --git a/submission_bundle/sft_baseline/docs_results/dosing_grpo.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/dosing_grpo.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/dosing_grpo.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/dosing_grpo.json diff --git a/submission_bundle/sft_baseline/docs_results/frontier_ready.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/frontier_ready.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/frontier_ready.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/frontier_ready.json diff --git a/submission_bundle/sft_baseline/docs_results/graph_train.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/graph_train.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/graph_train.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/graph_train.json diff --git a/submission_bundle/sft_baseline/docs_results/grpo_ablation_report.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_ablation_report.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/grpo_ablation_report.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_ablation_report.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_reward_curves.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..38c93fc5e74f14d64778bf3fc94463f312e3d6e6 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5233693b37cb7132d11d7d550072b99fc5202d2a0a7304aaffb07bbaf74c84a +size 19699 diff --git a/submission_bundle/sft_baseline/docs_results/grpo_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/grpo_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run.json diff --git a/submission_bundle/sft_baseline/docs_results/grpo_trl_run_auto.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_auto.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/grpo_trl_run_auto.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_auto.json diff --git a/submission_bundle/sft_baseline/docs_results/grpo_trl_run_fallback_check.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_fallback_check.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/grpo_trl_run_fallback_check.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_fallback_check.json diff --git a/submission_bundle/sft_baseline/docs_results/grpo_trl_run_smoke.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_smoke.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/grpo_trl_run_smoke.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_smoke.json diff --git a/submission_bundle/sft_baseline/docs_results/grpo_trl_run_strict_check.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_strict_check.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/grpo_trl_run_strict_check.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/grpo_trl_run_strict_check.json diff --git a/submission_bundle/sft_baseline/docs_results/hf_space_verification.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/hf_space_verification.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/hf_space_verification.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/hf_space_verification.json diff --git a/submission_bundle/sft_baseline/docs_results/hf_sweep_summary.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/hf_sweep_summary.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/hf_sweep_summary.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/hf_sweep_summary.json diff --git a/submission_bundle/sft_baseline/docs_results/hf_training_status.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/hf_training_status.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/hf_training_status.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/hf_training_status.json diff --git a/submission_bundle/sft_baseline/docs_results/improvement_report.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/improvement_report.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/improvement_report.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/improvement_report.json diff --git a/submission_bundle/sft_baseline/docs_results/improvement_report_benchmark.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/improvement_report_benchmark.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/improvement_report_benchmark.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/improvement_report_benchmark.json diff --git a/submission_bundle/sft_baseline/docs_results/inference_benchmark.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_benchmark.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/inference_benchmark.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_benchmark.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_latency_validity.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..a86ea68fe05da60d6c6322a6eb1fa1fb60a788dc --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116b66e6d10c5f7e05dcf959f88265b502bb81e44c871d976b185d328cd2ccba +size 53622 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_validity_reward.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..026a26ad75ae34d513b7c1affbb83daf6675fd7c --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ba7cb41ca42268f40219ef7556d8b3e61156af3bc8c97dfed4c7b6b4bc39e9 +size 49225 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/legality_rate.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/submission_bundle/sft_baseline/docs_results/planner_grpo.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/planner_grpo.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/planner_grpo.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/planner_grpo.json diff --git a/submission_bundle/sft_baseline/docs_results/plot_index.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/plot_index.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/plot_index.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/plot_index.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/submission_bundle/sft_baseline/docs_results/postsave_inference.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/postsave_inference.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/postsave_inference.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/postsave_inference.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_grpo_reward.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8a0d04f71a614b34ebf457f2568ea9c455c4e2cc --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af6f1dce9ab1a81ac026e281438d9a40bee41aaea6eed0190df75033ec6e873 +size 45366 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_sft_loss.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..02106a69703a6545da9753f86d425f25b9af3256 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fbf956625fe63139c7341686dee2fe928e4a908380beebe06653f4b532b937 +size 40728 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_sft_reward.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..35a0cca6ff890447edc7063f56b3c947ae418cbe --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd868c27b014c51f4b972045f5d1156e6debb21d21835b6e3da4dbab7f5f6d77 +size 44621 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/reward_component_bars.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..713b1354077916a5ded3a1fb4684b75cca0d4d70 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e45b33c89bbb501551ab6ebdd6ff958994d00259e8bfd04f19a6affdb28a66 +size 22230 diff --git a/submission_bundle/sft_baseline/docs_results/risk_train.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/risk_train.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/risk_train.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/risk_train.json diff --git a/submission_bundle/sft_baseline/docs_results/robustness.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/robustness.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/robustness.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/robustness.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_loss_curves.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..b24b4375e766ffec8647952af25b6baed4bc0abf --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa82325064843def8e501725340f29b5e0782825dcce9fba5349fd6e68b559e +size 76094 diff --git a/submission_bundle/sft_baseline/docs_results/sft_run.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_run.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sft_run.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_run.json diff --git a/submission_bundle/sft_baseline/docs_results/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_trl_run.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_validity_reward.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8e95b1dd424b9e8bc4d2842b0f26508b1096b1b5 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986742eddcce5b63a43a0618a24f161034d22c4818156f32a055f30abecbb019 +size 46841 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_vs_grpo_reward.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a6b7230c05cdacd4e39c1612af3f65c1c70f4d64 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce92f65e018f8374be0e4db966d2ad33c9f86ee143e6386c14a6f055177d5a25 +size 50183 diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/success_rate.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/submission_bundle/sft_baseline/docs_results/supervisor_grpo.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/supervisor_grpo.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/supervisor_grpo.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/supervisor_grpo.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/docs_results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/polyguard-rl/submission_bundle/sft_baseline/docs_results/train_holdout_gap.png b/polyguard-rl/submission_bundle/sft_baseline/docs_results/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..d2f671922080bd4787c7b9f16f4526cfc2e20b94 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/docs_results/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202c512e64e66edf886f78e400795acca9a01b0832ce00ee14e95ebd1effa77b +size 45030 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/anti_cheat_failure_rates.png b/polyguard-rl/submission_bundle/sft_baseline/plots/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..5a667f30979c0e4ee981b89910fe48b9d5f64587 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/anti_cheat_failure_rates.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1599181b9f2a499b59facf5629c0a413268a48d8a201662995e94a5f9411948a +size 50051 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/avg_process_fidelity.png b/polyguard-rl/submission_bundle/sft_baseline/plots/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..af8956c1da14001e096d7cabf96f511bd517c075 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/avg_process_fidelity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f7f1be2533be9d0862eb78c8948ce689128b3e0d2c72cf89719dbecbf8da04 +size 12532 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/avg_reward.png b/polyguard-rl/submission_bundle/sft_baseline/plots/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..abf2aa56b5280d757f4a3af28aeef928ff704530 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e45b324d208a71eeffa0dcfe146a5a7f2e17cf5a951a173dcd68818106d774c +size 11094 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/grpo_reward_curves.png b/polyguard-rl/submission_bundle/sft_baseline/plots/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..38c93fc5e74f14d64778bf3fc94463f312e3d6e6 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5233693b37cb7132d11d7d550072b99fc5202d2a0a7304aaffb07bbaf74c84a +size 19699 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/inference_latency_validity.png b/polyguard-rl/submission_bundle/sft_baseline/plots/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..a86ea68fe05da60d6c6322a6eb1fa1fb60a788dc --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/inference_latency_validity.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:116b66e6d10c5f7e05dcf959f88265b502bb81e44c871d976b185d328cd2ccba +size 53622 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/inference_validity_reward.png b/polyguard-rl/submission_bundle/sft_baseline/plots/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..026a26ad75ae34d513b7c1affbb83daf6675fd7c --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/inference_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ba7cb41ca42268f40219ef7556d8b3e61156af3bc8c97dfed4c7b6b4bc39e9 +size 49225 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/legality_rate.png b/polyguard-rl/submission_bundle/sft_baseline/plots/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..293afc9e01bcc4b9cfa048d0264b1471d8fd486b --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/legality_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd21eae41ea8c538ed698e94803df78bf0f2a13c0792b6492a2e15b4b449243c +size 10839 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/policy_stack_avg_reward.png b/polyguard-rl/submission_bundle/sft_baseline/plots/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1081c33e7498b4e47e9ceb4abbff0ecacb785d --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/policy_stack_avg_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437dc54a16cd930fe191bfb337ebb598845f7c0ceea812d936d3cc7f60593e19 +size 13709 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_grpo_reward.png b/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8a0d04f71a614b34ebf457f2568ea9c455c4e2cc --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af6f1dce9ab1a81ac026e281438d9a40bee41aaea6eed0190df75033ec6e873 +size 45366 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_sft_loss.png b/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..02106a69703a6545da9753f86d425f25b9af3256 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_sft_loss.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fbf956625fe63139c7341686dee2fe928e4a908380beebe06653f4b532b937 +size 40728 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_sft_reward.png b/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..35a0cca6ff890447edc7063f56b3c947ae418cbe --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/qwen_model_sft_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd868c27b014c51f4b972045f5d1156e6debb21d21835b6e3da4dbab7f5f6d77 +size 44621 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/reward_component_bars.png b/polyguard-rl/submission_bundle/sft_baseline/plots/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..713b1354077916a5ded3a1fb4684b75cca0d4d70 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e45b33c89bbb501551ab6ebdd6ff958994d00259e8bfd04f19a6affdb28a66 +size 22230 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/sft_loss_curves.png b/polyguard-rl/submission_bundle/sft_baseline/plots/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..b24b4375e766ffec8647952af25b6baed4bc0abf --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/sft_loss_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa82325064843def8e501725340f29b5e0782825dcce9fba5349fd6e68b559e +size 76094 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/sft_validity_reward.png b/polyguard-rl/submission_bundle/sft_baseline/plots/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..8e95b1dd424b9e8bc4d2842b0f26508b1096b1b5 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/sft_validity_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986742eddcce5b63a43a0618a24f161034d22c4818156f32a055f30abecbb019 +size 46841 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/sft_vs_grpo_reward.png b/polyguard-rl/submission_bundle/sft_baseline/plots/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a6b7230c05cdacd4e39c1612af3f65c1c70f4d64 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/sft_vs_grpo_reward.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce92f65e018f8374be0e4db966d2ad33c9f86ee143e6386c14a6f055177d5a25 +size 50183 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/success_rate.png b/polyguard-rl/submission_bundle/sft_baseline/plots/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..d48cf79006c0bc7241782a29d010e95bc524069f --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/success_rate.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb49f19b04948dbe3dcac9b2469e93e6e07167fd297587228d70232e7aa41ff5 +size 11233 diff --git a/polyguard-rl/submission_bundle/sft_baseline/plots/train_holdout_gap.png b/polyguard-rl/submission_bundle/sft_baseline/plots/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..d2f671922080bd4787c7b9f16f4526cfc2e20b94 --- /dev/null +++ b/polyguard-rl/submission_bundle/sft_baseline/plots/train_holdout_gap.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202c512e64e66edf886f78e400795acca9a01b0832ce00ee14e95ebd1effa77b +size 45030 diff --git a/submission_bundle/sft_baseline/reports/anti_hacking_overfit_report.json b/polyguard-rl/submission_bundle/sft_baseline/reports/anti_hacking_overfit_report.json similarity index 100% rename from submission_bundle/sft_baseline/reports/anti_hacking_overfit_report.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/anti_hacking_overfit_report.json diff --git a/submission_bundle/sft_baseline/reports/baselines.json b/polyguard-rl/submission_bundle/sft_baseline/reports/baselines.json similarity index 100% rename from submission_bundle/sft_baseline/reports/baselines.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/baselines.json diff --git a/submission_bundle/sft_baseline/reports/benchmark_report.json b/polyguard-rl/submission_bundle/sft_baseline/reports/benchmark_report.json similarity index 100% rename from submission_bundle/sft_baseline/reports/benchmark_report.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/benchmark_report.json diff --git a/submission_bundle/sft_baseline/reports/benchmark_report.txt b/polyguard-rl/submission_bundle/sft_baseline/reports/benchmark_report.txt similarity index 100% rename from submission_bundle/sft_baseline/reports/benchmark_report.txt rename to polyguard-rl/submission_bundle/sft_baseline/reports/benchmark_report.txt diff --git a/submission_bundle/sft_baseline/reports/hf_sweep_summary.json b/polyguard-rl/submission_bundle/sft_baseline/reports/hf_sweep_summary.json similarity index 100% rename from submission_bundle/sft_baseline/reports/hf_sweep_summary.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/hf_sweep_summary.json diff --git a/submission_bundle/sft_baseline/reports/hf_training_status.json b/polyguard-rl/submission_bundle/sft_baseline/reports/hf_training_status.json similarity index 100% rename from submission_bundle/sft_baseline/reports/hf_training_status.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/hf_training_status.json diff --git a/submission_bundle/sft_baseline/reports/improvement_report.json b/polyguard-rl/submission_bundle/sft_baseline/reports/improvement_report.json similarity index 100% rename from submission_bundle/sft_baseline/reports/improvement_report.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/improvement_report.json diff --git a/submission_bundle/sft_baseline/reports/inference_benchmark.json b/polyguard-rl/submission_bundle/sft_baseline/reports/inference_benchmark.json similarity index 100% rename from submission_bundle/sft_baseline/reports/inference_benchmark.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/inference_benchmark.json diff --git a/submission_bundle/sft_baseline/reports/plot_index.json b/polyguard-rl/submission_bundle/sft_baseline/reports/plot_index.json similarity index 100% rename from submission_bundle/sft_baseline/reports/plot_index.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/plot_index.json diff --git a/submission_bundle/sft_baseline/reports/postsave_inference.json b/polyguard-rl/submission_bundle/sft_baseline/reports/postsave_inference.json similarity index 100% rename from submission_bundle/sft_baseline/reports/postsave_inference.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/postsave_inference.json diff --git a/submission_bundle/sft_baseline/reports/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sft_trl_run.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json diff --git a/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json similarity index 100% rename from submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json rename to polyguard-rl/submission_bundle/sft_baseline/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json diff --git a/tests/test_acceptance_gate.py b/polyguard-rl/tests/test_acceptance_gate.py similarity index 100% rename from tests/test_acceptance_gate.py rename to polyguard-rl/tests/test_acceptance_gate.py diff --git a/tests/test_agents.py b/polyguard-rl/tests/test_agents.py similarity index 100% rename from tests/test_agents.py rename to polyguard-rl/tests/test_agents.py diff --git a/tests/test_anti_cheat.py b/polyguard-rl/tests/test_anti_cheat.py similarity index 100% rename from tests/test_anti_cheat.py rename to polyguard-rl/tests/test_anti_cheat.py diff --git a/tests/test_api.py b/polyguard-rl/tests/test_api.py similarity index 100% rename from tests/test_api.py rename to polyguard-rl/tests/test_api.py diff --git a/tests/test_checkpoint_integrity.py b/polyguard-rl/tests/test_checkpoint_integrity.py similarity index 100% rename from tests/test_checkpoint_integrity.py rename to polyguard-rl/tests/test_checkpoint_integrity.py diff --git a/tests/test_constraints.py b/polyguard-rl/tests/test_constraints.py similarity index 100% rename from tests/test_constraints.py rename to polyguard-rl/tests/test_constraints.py diff --git a/tests/test_contextual_bandit.py b/polyguard-rl/tests/test_contextual_bandit.py similarity index 100% rename from tests/test_contextual_bandit.py rename to polyguard-rl/tests/test_contextual_bandit.py diff --git a/tests/test_dataops_parser.py b/polyguard-rl/tests/test_dataops_parser.py similarity index 100% rename from tests/test_dataops_parser.py rename to polyguard-rl/tests/test_dataops_parser.py diff --git a/tests/test_env_reset.py b/polyguard-rl/tests/test_env_reset.py similarity index 100% rename from tests/test_env_reset.py rename to polyguard-rl/tests/test_env_reset.py diff --git a/tests/test_env_step.py b/polyguard-rl/tests/test_env_step.py similarity index 100% rename from tests/test_env_step.py rename to polyguard-rl/tests/test_env_step.py diff --git a/tests/test_env_step_flow.py b/polyguard-rl/tests/test_env_step_flow.py similarity index 100% rename from tests/test_env_step_flow.py rename to polyguard-rl/tests/test_env_step_flow.py diff --git a/tests/test_future_subenvs.py b/polyguard-rl/tests/test_future_subenvs.py similarity index 100% rename from tests/test_future_subenvs.py rename to polyguard-rl/tests/test_future_subenvs.py diff --git a/tests/test_graph_infer.py b/polyguard-rl/tests/test_graph_infer.py similarity index 100% rename from tests/test_graph_infer.py rename to polyguard-rl/tests/test_graph_infer.py diff --git a/tests/test_hf_training_sweep.py b/polyguard-rl/tests/test_hf_training_sweep.py similarity index 100% rename from tests/test_hf_training_sweep.py rename to polyguard-rl/tests/test_hf_training_sweep.py diff --git a/tests/test_medication_alternatives.py b/polyguard-rl/tests/test_medication_alternatives.py similarity index 100% rename from tests/test_medication_alternatives.py rename to polyguard-rl/tests/test_medication_alternatives.py diff --git a/tests/test_openenv_contract.py b/polyguard-rl/tests/test_openenv_contract.py similarity index 100% rename from tests/test_openenv_contract.py rename to polyguard-rl/tests/test_openenv_contract.py diff --git a/tests/test_parser.py b/polyguard-rl/tests/test_parser.py similarity index 100% rename from tests/test_parser.py rename to polyguard-rl/tests/test_parser.py diff --git a/tests/test_policy_schema.py b/polyguard-rl/tests/test_policy_schema.py similarity index 100% rename from tests/test_policy_schema.py rename to polyguard-rl/tests/test_policy_schema.py diff --git a/tests/test_postsave_inference.py b/polyguard-rl/tests/test_postsave_inference.py similarity index 100% rename from tests/test_postsave_inference.py rename to polyguard-rl/tests/test_postsave_inference.py diff --git a/tests/test_provider_runtime.py b/polyguard-rl/tests/test_provider_runtime.py similarity index 100% rename from tests/test_provider_runtime.py rename to polyguard-rl/tests/test_provider_runtime.py diff --git a/tests/test_remote_env.py b/polyguard-rl/tests/test_remote_env.py similarity index 100% rename from tests/test_remote_env.py rename to polyguard-rl/tests/test_remote_env.py diff --git a/tests/test_reward_channels.py b/polyguard-rl/tests/test_reward_channels.py similarity index 100% rename from tests/test_reward_channels.py rename to polyguard-rl/tests/test_reward_channels.py diff --git a/tests/test_reward_functions.py b/polyguard-rl/tests/test_reward_functions.py similarity index 100% rename from tests/test_reward_functions.py rename to polyguard-rl/tests/test_reward_functions.py diff --git a/tests/test_reward_range.py b/polyguard-rl/tests/test_reward_range.py similarity index 100% rename from tests/test_reward_range.py rename to polyguard-rl/tests/test_reward_range.py diff --git a/tests/test_runner_notebook.py b/polyguard-rl/tests/test_runner_notebook.py similarity index 100% rename from tests/test_runner_notebook.py rename to polyguard-rl/tests/test_runner_notebook.py diff --git a/tests/test_submission_evidence.py b/polyguard-rl/tests/test_submission_evidence.py similarity index 100% rename from tests/test_submission_evidence.py rename to polyguard-rl/tests/test_submission_evidence.py diff --git a/tests/test_timeout_logic.py b/polyguard-rl/tests/test_timeout_logic.py similarity index 100% rename from tests/test_timeout_logic.py rename to polyguard-rl/tests/test_timeout_logic.py diff --git a/uv.lock b/polyguard-rl/uv.lock similarity index 100% rename from uv.lock rename to polyguard-rl/uv.lock diff --git a/run_all_terminals.sh b/run_all_terminals.sh new file mode 100644 index 0000000000000000000000000000000000000000..7046a993922b4250e25afa819c2d34fb0a2f8663 --- /dev/null +++ b/run_all_terminals.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SELF_PATH="$(readlink -f "${BASH_SOURCE[0]}")" + +if [[ -d "$SCRIPT_DIR/polyguard-rl" ]]; then + APP_DIR="$SCRIPT_DIR/polyguard-rl" +else + APP_DIR="$SCRIPT_DIR" +fi + +run_service() { + local service="${1:-}" + local title script + + case "$service" in + env) + title="PolyGuard Backend Env" + script="scripts/run_env_local.sh" + ;; + api) + title="PolyGuard Backend API" + script="scripts/run_api_local.sh" + ;; + ui|frontend) + title="PolyGuard Frontend" + script="scripts/run_ui_local.sh" + ;; + *) + echo "Unknown service: ${service:-}" + echo "Expected one of: env, api, ui" + exit 1 + ;; + esac + + cd "$APP_DIR" + echo "[$title] Starting from $APP_DIR" + echo "[$title] Running: bash $script" + echo + + set +e + bash "$script" + local status=$? + set -e + + echo + echo "[$title] exited with status $status." + if [[ -t 0 ]]; then + read -r -p "Press Enter to close this terminal..." _ + fi + exit "$status" +} + +launch_terminal() { + local service="$1" + local title="$2" + + if command -v gnome-terminal >/dev/null 2>&1; then + gnome-terminal --title="$title" -- bash "$SELF_PATH" --run-service "$service" & + elif command -v kgx >/dev/null 2>&1; then + kgx --title "$title" -- bash "$SELF_PATH" --run-service "$service" & + elif command -v konsole >/dev/null 2>&1; then + konsole --workdir "$APP_DIR" -p tabtitle="$title" -e bash "$SELF_PATH" --run-service "$service" & + elif command -v xfce4-terminal >/dev/null 2>&1; then + xfce4-terminal --title="$title" --working-directory="$APP_DIR" --execute bash "$SELF_PATH" --run-service "$service" & + elif command -v mate-terminal >/dev/null 2>&1; then + mate-terminal --title="$title" --working-directory="$APP_DIR" -- bash "$SELF_PATH" --run-service "$service" & + elif command -v xterm >/dev/null 2>&1; then + xterm -T "$title" -e bash "$SELF_PATH" --run-service "$service" & + else + echo "No supported terminal emulator found." + echo + echo "Open three terminals manually and run:" + echo " cd \"$APP_DIR\" && bash scripts/run_env_local.sh" + echo " cd \"$APP_DIR\" && bash scripts/run_api_local.sh" + echo " cd \"$APP_DIR\" && bash scripts/run_ui_local.sh" + exit 1 + fi +} + +usage() { + cat <