Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +81 -0
- README.md +154 -5
- __init__.py +10 -0
- client.py +47 -0
- eval_tasks.jsonl +10 -0
- models.py +78 -0
- openenv.yaml +7 -0
- openenv_stabilizer_forge.egg-info/PKG-INFO +11 -0
- openenv_stabilizer_forge.egg-info/SOURCES.txt +15 -0
- openenv_stabilizer_forge.egg-info/dependency_links.txt +1 -0
- openenv_stabilizer_forge.egg-info/entry_points.txt +2 -0
- openenv_stabilizer_forge.egg-info/requires.txt +7 -0
- openenv_stabilizer_forge.egg-info/top_level.txt +1 -0
- pyproject.toml +40 -0
- server/__init__.py +11 -0
- server/app.py +70 -0
- server/requirements.txt +5 -0
- server/stabilizer_forge_environment.py +294 -0
- server/verifier.py +64 -0
- tasks.jsonl +29 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=stabilizer_forge
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
# Health check
|
| 75 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 76 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 77 |
+
|
| 78 |
+
# Run the FastAPI server
|
| 79 |
+
# The module path is constructed to work with the /app/env structure
|
| 80 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 81 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,159 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: StabilizerForge — Quantum-Code Synthesis Environment
|
| 3 |
+
emoji: ⚛️
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
- quantum-error-correction
|
| 13 |
+
- stim
|
| 14 |
+
- stabilizer-codes
|
| 15 |
+
- rlvr
|
| 16 |
---
|
| 17 |
|
| 18 |
+
# StabilizerForge — OpenEnv environment for Clifford circuit synthesis
|
| 19 |
+
|
| 20 |
+
An RL environment that scores candidate Clifford encoding circuits against a target stabilizer code in **polynomial time** using [Stim](https://github.com/quantumlib/Stim)'s tableau simulator (Aaronson–Gottesman). Built for training small LLMs to do automated quantum-error-correction code synthesis with verifier-grounded rewards (RLVR).
|
| 21 |
+
|
| 22 |
+
The environment ships with **29 training tasks + 10 held-out eval tasks** across three difficulty tiers — from Bell states (2 qubits, 2 gates) up to distance-5 surface, color, and Golay codes (≤25 qubits, 100+ gates).
|
| 23 |
+
|
| 24 |
+
## Action / Observation / Reward
|
| 25 |
+
|
| 26 |
+
**Action** — one Clifford gate per step (or `FINALIZE`):
|
| 27 |
+
|
| 28 |
+
```python
|
| 29 |
+
from stabilizer_forge import StabilizerAction
|
| 30 |
+
StabilizerAction(op="H", qubits=[0]) # Hadamard on qubit 0
|
| 31 |
+
StabilizerAction(op="S", qubits=[3]) # phase gate on qubit 3
|
| 32 |
+
StabilizerAction(op="CX", qubits=[0, 1]) # CNOT 0 -> 1
|
| 33 |
+
StabilizerAction(op="FINALIZE") # end episode, deliver terminal reward
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
Pydantic validation; malformed actions get a format penalty and are treated as no-ops. After 5 consecutive format violations the episode terminates.
|
| 37 |
+
|
| 38 |
+
**Observation** — full state for the current episode:
|
| 39 |
+
|
| 40 |
+
| field | type | meaning |
|
| 41 |
+
|-------|------|---------|
|
| 42 |
+
| `task_id` | str | which task this episode is running |
|
| 43 |
+
| `target_stabilizers` | `list[str]` | Pauli strings, e.g. `["XZZXI", "IXZZX", ...]` |
|
| 44 |
+
| `n_qubits` | int | number of physical qubits |
|
| 45 |
+
| `gates_so_far` | `list[str]` | Stim instructions applied this episode |
|
| 46 |
+
| `current_circuit` | str | concatenated Stim text |
|
| 47 |
+
| `current_match` | `list[bool]` | per-stabilizer preservation under the current circuit (live from Stim) |
|
| 48 |
+
| `match_fraction` | float | fraction of target stabilizers preserved (0..1) |
|
| 49 |
+
| `gates_emitted` | int | valid gates applied so far |
|
| 50 |
+
| `cnot_count` | int | CX count |
|
| 51 |
+
| `nonadj_cnot_count` | int | CXs across non-adjacent qubits |
|
| 52 |
+
| `gate_budget` / `gate_budget_remaining` | int | hard cap (`2 × benchmark_optimum`) |
|
| 53 |
+
| `benchmark_optimum` / `benchmark_optimum_2q` | int | reference encoder's gate counts |
|
| 54 |
+
| `connectivity_edges` | `list[list[int]] \| None` | None = all-to-all |
|
| 55 |
+
| `format_violations`, `consecutive_violations` | int | error tracking |
|
| 56 |
+
| `last_action_valid`, `last_action_error` | bool, str | parser feedback |
|
| 57 |
+
| `step_count`, `finalized` | | |
|
| 58 |
+
|
| 59 |
+
**Reward** — delivered at `FINALIZE` plus dense per-step shaping:
|
| 60 |
+
|
| 61 |
+
| component | weight | what it measures |
|
| 62 |
+
|-----------|--------|------------------|
|
| 63 |
+
| stabilizer-match fraction | **0.40** | primary correctness signal |
|
| 64 |
+
| gate-count efficiency `max(0, 1 − gates / (1.5 × bench_opt))` | 0.20 | volume vs. reference |
|
| 65 |
+
| two-qubit-gate efficiency | 0.20 | CXs are expensive on real hardware |
|
| 66 |
+
| connectivity respect | 0.10 | −1 per CX across non-adjacent qubits |
|
| 67 |
+
| format compliance | 0.10 | −1 per malformed action |
|
| 68 |
+
| `0.05 × Δmatch_fraction` | per step | dense gradient before FINALIZE is learned |
|
| 69 |
+
|
| 70 |
+
## Quick start (sync HTTP)
|
| 71 |
+
|
| 72 |
+
```python
|
| 73 |
+
from stabilizer_forge import StabilizerAction, StabilizerForgeEnv
|
| 74 |
+
|
| 75 |
+
client = StabilizerForgeEnv(base_url="http://localhost:8000")
|
| 76 |
+
with client.sync() as env:
|
| 77 |
+
r = env.reset(task_id="steane") # pass any task_id from tasks.jsonl, or omit for random
|
| 78 |
+
print(r.observation.target_stabilizers) # 6 Pauli strings on 7 qubits
|
| 79 |
+
|
| 80 |
+
for op, qs in [("H",[0]), ("CX",[0,1]), ("CX",[0,2]), ("CX",[0,3])]:
|
| 81 |
+
r = env.step(StabilizerAction(op=op, qubits=qs))
|
| 82 |
+
print(f" match_fraction={r.observation.match_fraction:.2f}")
|
| 83 |
+
|
| 84 |
+
r = env.step(StabilizerAction(op="FINALIZE"))
|
| 85 |
+
print(f"terminal reward={r.reward:+.3f} done={r.done}")
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
To start the server locally:
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
python -m stabilizer_forge.server.app --port 8000
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
Or via Docker (see below).
|
| 95 |
+
|
| 96 |
+
## Tasks
|
| 97 |
+
|
| 98 |
+
The env loads tasks from `stabilizer_forge/tasks.jsonl` by default. Override with the `STABILIZER_FORGE_TASKS` environment variable. Each task carries:
|
| 99 |
+
|
| 100 |
+
```json
|
| 101 |
+
{
|
| 102 |
+
"task_id": "steane",
|
| 103 |
+
"source_code": "Steane [[7,1,3]]",
|
| 104 |
+
"n_qubits": 7,
|
| 105 |
+
"target_stabilizers": ["XXIIXXI", "XIXIXIX", "IIIXXXX", "ZZIIZZI", "ZIZIZIZ", "IIIZZZZ"],
|
| 106 |
+
"connectivity_edges": null,
|
| 107 |
+
"gate_budget": 78,
|
| 108 |
+
"benchmark_optimum": 26,
|
| 109 |
+
"benchmark_optimum_2q": 23,
|
| 110 |
+
"tier": 2
|
| 111 |
+
}
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
Tier 1 (12 tasks): Bell, GHZ-3..8, [[4,2,2]] iceberg/detector, hypercube `l=1`, iceberg `m=4`.
|
| 115 |
+
Tier 2 (12 tasks): Perfect [[5,1,3]], Steane, Shor, surface `d=3`, hex/square-octagon color `d=3`, GHZ-9..13, Carbon.
|
| 116 |
+
Tier 3 (5 tasks): Tetrahedral, Hamming, surface `d=5`, hex/square-octagon color `d=5`.
|
| 117 |
+
|
| 118 |
+
## Verifier
|
| 119 |
+
|
| 120 |
+
The match-fraction comes from Stim's `TableauSimulator.peek_observable_expectation`. For each target stabilizer `S_i`, we apply the candidate circuit to `|0⟩^n`, then check whether the resulting state has `+1` eigenvalue under `S_i`. This is exact and polynomial — there's no false-positive risk and no statistical noise. Vendored from [uw-math-ai/quantum-ai/tools/check_stabilizers.py](https://github.com/uw-math-ai/quantum-ai/blob/main/tools/check_stabilizers.py).
|
| 121 |
+
|
| 122 |
+
## Deploy
|
| 123 |
+
|
| 124 |
+
```bash
|
| 125 |
+
openenv push
|
| 126 |
+
# requires `huggingface-cli login` first
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
The deployed Space exposes `/health`, `/reset`, `/step`, `/state`, `/schema` over HTTP, and `/ws` for low-latency persistent sessions. Use `StabilizerForgeEnv(base_url="https://<your-space>.hf.space")` to connect.
|
| 130 |
+
|
| 131 |
+
## Building the Docker image manually
|
| 132 |
+
|
| 133 |
+
```bash
|
| 134 |
+
docker build -t stabilizer-forge-env:latest -f server/Dockerfile .
|
| 135 |
+
docker run -p 8000:8000 stabilizer-forge-env:latest
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
## Files
|
| 139 |
+
|
| 140 |
+
```
|
| 141 |
+
stabilizer_forge/
|
| 142 |
+
├── __init__.py
|
| 143 |
+
├── client.py # StabilizerForgeEnv (sync/async HTTP client)
|
| 144 |
+
├── models.py # StabilizerAction, StabilizerObservation
|
| 145 |
+
├── tasks.jsonl # 29 training tasks
|
| 146 |
+
├── eval_tasks.jsonl # 10 held-out eval tasks
|
| 147 |
+
├── pyproject.toml
|
| 148 |
+
├── openenv.yaml
|
| 149 |
+
└── server/
|
| 150 |
+
├── stabilizer_forge_environment.py # core env (reward, termination, verifier wrap)
|
| 151 |
+
├── verifier.py # Stim-based check_stabilizers, match_fraction
|
| 152 |
+
├── app.py # FastAPI; max_concurrent_envs=64
|
| 153 |
+
└── Dockerfile
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
## Citation / credits
|
| 157 |
+
|
| 158 |
+
- Verifier and benchmark catalog adapted from [uw-math-ai/quantum-ai](https://github.com/uw-math-ai/quantum-ai) (StabilizerBench, arXiv:2604.21287, April 2026).
|
| 159 |
+
- Built on [OpenEnv](https://github.com/meta-pytorch/OpenEnv) and [Stim](https://github.com/quantumlib/Stim).
|
__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""StabilizerForge environment."""
|
| 2 |
+
|
| 3 |
+
from .client import StabilizerForgeEnv
|
| 4 |
+
from .models import StabilizerAction, StabilizerObservation
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
"StabilizerAction",
|
| 8 |
+
"StabilizerObservation",
|
| 9 |
+
"StabilizerForgeEnv",
|
| 10 |
+
]
|
client.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""StabilizerForge environment client (sync + async via openenv core)."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
from openenv.core import EnvClient
|
| 7 |
+
from openenv.core.client_types import StepResult
|
| 8 |
+
from openenv.core.env_server.types import State
|
| 9 |
+
|
| 10 |
+
from .models import StabilizerAction, StabilizerObservation
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class StabilizerForgeEnv(EnvClient[StabilizerAction, StabilizerObservation, State]):
|
| 14 |
+
"""Client for the StabilizerForge environment server.
|
| 15 |
+
|
| 16 |
+
Example:
|
| 17 |
+
with StabilizerForgeEnv(base_url="http://localhost:8000") as client:
|
| 18 |
+
result = client.reset(task_id="steane_7_1_3")
|
| 19 |
+
result = client.step(StabilizerAction(op="H", qubits=[0]))
|
| 20 |
+
...
|
| 21 |
+
result = client.step(StabilizerAction(op="FINALIZE"))
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
def _step_payload(self, action: StabilizerAction) -> dict[str, Any]:
|
| 25 |
+
return {"op": action.op, "qubits": list(action.qubits)}
|
| 26 |
+
|
| 27 |
+
def _parse_result(
|
| 28 |
+
self, payload: dict[str, Any]
|
| 29 |
+
) -> StepResult[StabilizerObservation]:
|
| 30 |
+
obs_data = payload.get("observation", {})
|
| 31 |
+
observation = StabilizerObservation(**obs_data) if obs_data else StabilizerObservation()
|
| 32 |
+
# Top-level done/reward override what's inside observation if present
|
| 33 |
+
if "done" in payload:
|
| 34 |
+
observation.done = bool(payload.get("done"))
|
| 35 |
+
if "reward" in payload:
|
| 36 |
+
observation.reward = payload.get("reward")
|
| 37 |
+
return StepResult(
|
| 38 |
+
observation=observation,
|
| 39 |
+
reward=payload.get("reward"),
|
| 40 |
+
done=payload.get("done", False),
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
def _parse_state(self, payload: dict[str, Any]) -> State:
|
| 44 |
+
return State(
|
| 45 |
+
episode_id=payload.get("episode_id"),
|
| 46 |
+
step_count=payload.get("step_count", 0),
|
| 47 |
+
)
|
eval_tasks.jsonl
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"task_id": "ghz15", "source_code": "15-qubit GHZ", "n_qubits": 15, "target_stabilizers": ["XXXXXXXXXXXXXXX", "ZZIIIIIIIIIIIII", "IZZIIIIIIIIIIII", "IIZZIIIIIIIIIII", "IIIZZIIIIIIIIII", "IIIIZZIIIIIIIII", "IIIIIZZIIIIIIII", "IIIIIIZZIIIIIII", "IIIIIIIZZIIIIII", "IIIIIIIIZZIIIII", "IIIIIIIIIZZIIII", "IIIIIIIIIIZZIII", "IIIIIIIIIIIZZII", "IIIIIIIIIIIIZZI", "IIIIIIIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 60, "benchmark_optimum": 15, "benchmark_optimum_2q": 14, "tier": 2}
|
| 2 |
+
{"task_id": "ghz17", "source_code": "17-qubit GHZ", "n_qubits": 17, "target_stabilizers": ["XXXXXXXXXXXXXXXXX", "ZZIIIIIIIIIIIIIII", "IZZIIIIIIIIIIIIII", "IIZZIIIIIIIIIIIII", "IIIZZIIIIIIIIIIII", "IIIIZZIIIIIIIIIII", "IIIIIZZIIIIIIIIII", "IIIIIIZZIIIIIIIII", "IIIIIIIZZIIIIIIII", "IIIIIIIIZZIIIIIII", "IIIIIIIIIZZIIIIII", "IIIIIIIIIIZZIIIII", "IIIIIIIIIIIZZIIII", "IIIIIIIIIIIIZZIII", "IIIIIIIIIIIIIZZII", "IIIIIIIIIIIIIIZZI", "IIIIIIIIIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 68, "benchmark_optimum": 17, "benchmark_optimum_2q": 16, "tier": 2}
|
| 3 |
+
{"task_id": "ghz20", "source_code": "20-qubit GHZ", "n_qubits": 20, "target_stabilizers": ["XXXXXXXXXXXXXXXXXXXX", "ZZIIIIIIIIIIIIIIIIII", "IZZIIIIIIIIIIIIIIIII", "IIZZIIIIIIIIIIIIIIII", "IIIZZIIIIIIIIIIIIIII", "IIIIZZIIIIIIIIIIIIII", "IIIIIZZIIIIIIIIIIIII", "IIIIIIZZIIIIIIIIIIII", "IIIIIIIZZIIIIIIIIIII", "IIIIIIIIZZIIIIIIIIII", "IIIIIIIIIZZIIIIIIIII", "IIIIIIIIIIZZIIIIIIII", "IIIIIIIIIIIZZIIIIIII", "IIIIIIIIIIIIZZIIIIII", "IIIIIIIIIIIIIZZIIIII", "IIIIIIIIIIIIIIZZIIII", "IIIIIIIIIIIIIIIZZIII", "IIIIIIIIIIIIIIIIZZII", "IIIIIIIIIIIIIIIIIZZI", "IIIIIIIIIIIIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 80, "benchmark_optimum": 20, "benchmark_optimum_2q": 19, "tier": 3}
|
| 4 |
+
{"task_id": "golay", "source_code": "Golay", "n_qubits": 23, "target_stabilizers": ["IXIIXIIXXXXXIIIIIIIIIIX", "XIIXIIXXXXXIIIIIIIIIIXI", "IXXIXXXIIIXXIIIIIIIIXII", "XXIXXXIIIXXIIIIIIIIXIII", "XXXXIIIXIIXXIIIIIIXIIII", "XIXIXIXXXIIXIIIIIXIIIII", "IIIXXXXIXXIXIIIIXIIIIII", "IIXXXXIXXIXIIIIXIIIIIII", "IXXXXIXXIXIIIIXIIIIIIII", "XXXXIXXIXIIIIXIIIIIIIII", "XIXIIXIIXXXXXIIIIIIIIII", "IZIIZIIZZZZZIIIIIIIIIIZ", "ZIIZIIZZZZZIIIIIIIIIIZI", "IZZIZZZIIIZZIIIIIIIIZII", "ZZIZZZIIIZZIIIIIIIIZIII", "ZZZZIIIZIIZZIIIIIIZIIII", "ZIZIZIZZZIIZIIIIIZIIIII", "IIIZZZZIZZIZIIIIZIIIIII", "IIZZZZIZZIZIIIIZIIIIIII", "IZZZZIZZIZIIIIZIIIIIIII", "ZZZZIZZIZIIIIZIIIIIIIII", "ZIZIIZIIZZZZZIIIIIIIIII"], "connectivity_edges": null, "gate_budget": 555, "benchmark_optimum": 185, "benchmark_optimum_2q": 174, "tier": 3}
|
| 5 |
+
{"task_id": "iceberg_code_m_2_perfect_5-qubit_code", "source_code": "(Iceberg Code m=2) * (Perfect 5-Qubit Code)", "n_qubits": 20, "target_stabilizers": ["XZZXIIIIIIIIIIIIIIII", "IIIIIXZZXIIIIIIIIIII", "IIIIIIIIIIXZZXIIIIII", "IIIIIIIIIIIIIIIXZZXI", "IXZZXIIIIIIIIIIIIIII", "IIIIIIXZZXIIIIIIIIII", "IIIIIIIIIIIXZZXIIIII", "IIIIIIIIIIIIIIIIXZZX", "XIXZZIIIIIIIIIIIIIII", "IIIIIXIXZZIIIIIIIIII", "IIIIIIIIIIXIXZZIIIII", "IIIIIIIIIIIIIIIXIXZZ", "ZXIXZIIIIIIIIIIIIIII", "IIIIIZXIXZIIIIIIIIII", "IIIIIIIIIIZXIXZIIIII", "IIIIIIIIIIIIIIIZXIXZ", "XXXXXXXXXXXXXXXXXXXX", "ZZZZZZZZZZZZZZZZZZZZ"], "connectivity_edges": null, "gate_budget": 738, "benchmark_optimum": 246, "benchmark_optimum_2q": 135, "tier": 3}
|
| 6 |
+
{"task_id": "4-qubit_detector_code_perfect_5-qubit_code", "source_code": "(4-Qubit Detector Code) * (Perfect 5-Qubit Code)", "n_qubits": 20, "target_stabilizers": ["XZZXIIIIIIIIIIIIIIII", "IIIIIXZZXIIIIIIIIIII", "IIIIIIIIIIXZZXIIIIII", "IIIIIIIIIIIIIIIXZZXI", "IXZZXIIIIIIIIIIIIIII", "IIIIIIXZZXIIIIIIIIII", "IIIIIIIIIIIXZZXIIIII", "IIIIIIIIIIIIIIIIXZZX", "XIXZZIIIIIIIIIIIIIII", "IIIIIXIXZZIIIIIIIIII", "IIIIIIIIIIXIXZZIIIII", "IIIIIIIIIIIIIIIXIXZZ", "ZXIXZIIIIIIIIIIIIIII", "IIIIIZXIXZIIIIIIIIII", "IIIIIIIIIIZXIXZIIIII", "IIIIIIIIIIIIIIIZXIXZ", "XXXXXXXXXXXXXXXXXXXX", "ZZZZZZZZZZZZZZZZZZZZ"], "connectivity_edges": null, "gate_budget": 738, "benchmark_optimum": 246, "benchmark_optimum_2q": 135, "tier": 3}
|
| 7 |
+
{"task_id": "perfect_5-qubit_code_perfect_5-qubit_code", "source_code": "(Perfect 5-Qubit Code) * (Perfect 5-Qubit Code)", "n_qubits": 25, "target_stabilizers": ["XZZXIIIIIIIIIIIIIIIIIIIII", "IIIIIXZZXIIIIIIIIIIIIIIII", "IIIIIIIIIIXZZXIIIIIIIIIII", "IIIIIIIIIIIIIIIXZZXIIIIII", "IIIIIIIIIIIIIIIIIIIIXZZXI", "IXZZXIIIIIIIIIIIIIIIIIIII", "IIIIIIXZZXIIIIIIIIIIIIIII", "IIIIIIIIIIIXZZXIIIIIIIIII", "IIIIIIIIIIIIIIIIXZZXIIIII", "IIIIIIIIIIIIIIIIIIIIIXZZX", "XIXZZIIIIIIIIIIIIIIIIIIII", "IIIIIXIXZZIIIIIIIIIIIIIII", "IIIIIIIIIIXIXZZIIIIIIIIII", "IIIIIIIIIIIIIIIXIXZZIIIII", "IIIIIIIIIIIIIIIIIIIIXIXZZ", "ZXIXZIIIIIIIIIIIIIIIIIIII", "IIIIIZXIXZIIIIIIIIIIIIIII", "IIIIIIIIIIZXIXZIIIIIIIIII", "IIIIIIIIIIIIIIIZXIXZIIIII", "IIIIIIIIIIIIIIIIIIIIZXIXZ", "XXXXXZZZZZZZZZZXXXXXIIIII", "IIIIIXXXXXZZZZZZZZZZXXXXX", "XXXXXIIIIIXXXXXZZZZZZZZZZ", "ZZZZZXXXXXIIIIIXXXXXZZZZZ"], "connectivity_edges": null, "gate_budget": 1098, "benchmark_optimum": 366, "benchmark_optimum_2q": 192, "tier": 3}
|
| 8 |
+
{"task_id": "rep5_logical_plus", "source_code": "5-qubit repetition (logical |+>)", "n_qubits": 5, "target_stabilizers": ["XXXXX", "ZZIII", "IZZII", "IIZZI", "IIIZZ"], "connectivity_edges": null, "gate_budget": 30, "benchmark_optimum": 5, "benchmark_optimum_2q": 4, "tier": 2}
|
| 9 |
+
{"task_id": "ghz_pair", "source_code": "Pair of independent Bell states", "n_qubits": 4, "target_stabilizers": ["XXII", "ZZII", "IIXX", "IIZZ"], "connectivity_edges": null, "gate_budget": 20, "benchmark_optimum": 4, "benchmark_optimum_2q": 2, "tier": 1}
|
| 10 |
+
{"task_id": "linear_cluster_4", "source_code": "4-qubit linear cluster state", "n_qubits": 4, "target_stabilizers": ["XZII", "ZXZI", "IZXZ", "IIZX"], "connectivity_edges": [[0, 1], [1, 2], [2, 3]], "gate_budget": 20, "benchmark_optimum": 7, "benchmark_optimum_2q": 3, "tier": 1}
|
models.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data models for the StabilizerForge environment.
|
| 2 |
+
|
| 3 |
+
Action: a single Clifford gate (H, S, CX) or FINALIZE.
|
| 4 |
+
Observation: target stabilizers + circuit-so-far + current match fraction + bookkeeping.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
from typing import Literal
|
| 9 |
+
|
| 10 |
+
from openenv.core.env_server.types import Action, Observation
|
| 11 |
+
from pydantic import Field
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class StabilizerAction(Action):
|
| 15 |
+
"""One Clifford gate, or FINALIZE to end the episode."""
|
| 16 |
+
|
| 17 |
+
op: Literal["H", "S", "CX", "FINALIZE"] = Field(
|
| 18 |
+
..., description="Gate to apply, or FINALIZE to end the episode."
|
| 19 |
+
)
|
| 20 |
+
qubits: list[int] = Field(
|
| 21 |
+
default_factory=list,
|
| 22 |
+
description="Target qubit indices. 1 for H/S, 2 for CX (control, target). Empty for FINALIZE.",
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class StabilizerObservation(Observation):
|
| 27 |
+
"""Full per-step view of the episode."""
|
| 28 |
+
|
| 29 |
+
task_id: str = Field(default="", description="Active task identifier.")
|
| 30 |
+
target_stabilizers: list[str] = Field(
|
| 31 |
+
default_factory=list,
|
| 32 |
+
description="Target stabilizer generators as Pauli strings (e.g., 'XZZXI').",
|
| 33 |
+
)
|
| 34 |
+
n_qubits: int = Field(default=0, description="Number of physical qubits.")
|
| 35 |
+
connectivity_edges: list[list[int]] | None = Field(
|
| 36 |
+
default=None,
|
| 37 |
+
description="Adjacency edge list. None means all-to-all.",
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
gates_so_far: list[str] = Field(
|
| 41 |
+
default_factory=list,
|
| 42 |
+
description="Gates applied this episode, as Stim instruction strings.",
|
| 43 |
+
)
|
| 44 |
+
current_circuit: str = Field(
|
| 45 |
+
default="",
|
| 46 |
+
description="Concatenated Stim text of all gates emitted so far.",
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
current_match: list[bool] = Field(
|
| 50 |
+
default_factory=list,
|
| 51 |
+
description="Per-stabilizer preservation under current circuit.",
|
| 52 |
+
)
|
| 53 |
+
match_fraction: float = Field(
|
| 54 |
+
default=0.0, description="Fraction of target stabilizers preserved (0..1)."
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
gates_emitted: int = Field(default=0, description="Number of valid gates applied.")
|
| 58 |
+
cnot_count: int = Field(default=0, description="Number of CX gates applied.")
|
| 59 |
+
nonadj_cnot_count: int = Field(
|
| 60 |
+
default=0,
|
| 61 |
+
description="Number of CX gates applied across non-adjacent qubits.",
|
| 62 |
+
)
|
| 63 |
+
gate_budget: int = Field(default=0, description="Hard cap on total gates.")
|
| 64 |
+
gate_budget_remaining: int = Field(default=0)
|
| 65 |
+
benchmark_optimum: int = Field(
|
| 66 |
+
default=0, description="Reference encoding's gate count (volume-style)."
|
| 67 |
+
)
|
| 68 |
+
benchmark_optimum_2q: int = Field(
|
| 69 |
+
default=0, description="Reference encoding's two-qubit gate count."
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
format_violations: int = Field(default=0)
|
| 73 |
+
consecutive_violations: int = Field(default=0)
|
| 74 |
+
last_action_valid: bool = Field(default=True)
|
| 75 |
+
last_action_error: str = Field(default="")
|
| 76 |
+
|
| 77 |
+
step_count: int = Field(default=0)
|
| 78 |
+
finalized: bool = Field(default=False)
|
openenv.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: stabilizer_forge
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
openenv_stabilizer_forge.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-stabilizer_forge
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Stabilizer Forge environment for OpenEnv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core[core]>=0.2.2
|
| 7 |
+
Requires-Dist: stim>=1.13
|
| 8 |
+
Requires-Dist: numpy>=1.24
|
| 9 |
+
Provides-Extra: dev
|
| 10 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 11 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_stabilizer_forge.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
./__init__.py
|
| 4 |
+
./client.py
|
| 5 |
+
./models.py
|
| 6 |
+
openenv_stabilizer_forge.egg-info/PKG-INFO
|
| 7 |
+
openenv_stabilizer_forge.egg-info/SOURCES.txt
|
| 8 |
+
openenv_stabilizer_forge.egg-info/dependency_links.txt
|
| 9 |
+
openenv_stabilizer_forge.egg-info/entry_points.txt
|
| 10 |
+
openenv_stabilizer_forge.egg-info/requires.txt
|
| 11 |
+
openenv_stabilizer_forge.egg-info/top_level.txt
|
| 12 |
+
server/__init__.py
|
| 13 |
+
server/app.py
|
| 14 |
+
server/stabilizer_forge_environment.py
|
| 15 |
+
server/verifier.py
|
openenv_stabilizer_forge.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_stabilizer_forge.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = stabilizer_forge.server.app:main
|
openenv_stabilizer_forge.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]>=0.2.2
|
| 2 |
+
stim>=1.13
|
| 3 |
+
numpy>=1.24
|
| 4 |
+
|
| 5 |
+
[dev]
|
| 6 |
+
pytest>=8.0.0
|
| 7 |
+
pytest-cov>=4.0.0
|
openenv_stabilizer_forge.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
stabilizer_forge
|
pyproject.toml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-stabilizer_forge"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Stabilizer Forge environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
+
# install from github
|
| 19 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
+
"openenv-core[core]>=0.2.2",
|
| 21 |
+
# Environment-specific dependencies
|
| 22 |
+
"stim>=1.13",
|
| 23 |
+
"numpy>=1.24",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
[project.optional-dependencies]
|
| 27 |
+
dev = [
|
| 28 |
+
"pytest>=8.0.0",
|
| 29 |
+
"pytest-cov>=4.0.0",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
[project.scripts]
|
| 33 |
+
# Server entry point - enables running via: uv run --project . server
|
| 34 |
+
# or: python -m stabilizer_forge.server.app
|
| 35 |
+
server = "stabilizer_forge.server.app:main"
|
| 36 |
+
|
| 37 |
+
[tool.setuptools]
|
| 38 |
+
include-package-data = true
|
| 39 |
+
packages = ["stabilizer_forge", "stabilizer_forge.server"]
|
| 40 |
+
package-dir = { "stabilizer_forge" = ".", "stabilizer_forge.server" = "server" }
|
server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Stabilizer Forge environment server components."""
|
| 8 |
+
|
| 9 |
+
from .stabilizer_forge_environment import StabilizerForgeEnvironment
|
| 10 |
+
|
| 11 |
+
__all__ = ["StabilizerForgeEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the Stabilizer Forge Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the StabilizerForgeEnvironment
|
| 11 |
+
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
+
|
| 13 |
+
Endpoints:
|
| 14 |
+
- POST /reset: Reset the environment
|
| 15 |
+
- POST /step: Execute an action
|
| 16 |
+
- GET /state: Get current environment state
|
| 17 |
+
- GET /schema: Get action/observation schemas
|
| 18 |
+
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Development (with auto-reload):
|
| 22 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
+
|
| 24 |
+
# Production:
|
| 25 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
+
|
| 27 |
+
# Or run directly:
|
| 28 |
+
python -m server.app
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
from openenv.core.env_server.http_server import create_app
|
| 33 |
+
except Exception as e: # pragma: no cover
|
| 34 |
+
raise ImportError(
|
| 35 |
+
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 36 |
+
) from e
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
from ..models import StabilizerAction, StabilizerObservation
|
| 40 |
+
from .stabilizer_forge_environment import StabilizerForgeEnvironment
|
| 41 |
+
except ImportError:
|
| 42 |
+
from models import StabilizerAction, StabilizerObservation
|
| 43 |
+
from server.stabilizer_forge_environment import StabilizerForgeEnvironment
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Create the app. max_concurrent_envs=64 is the TRL-OpenEnv-RFC gotcha:
|
| 47 |
+
# the default of 1 silently kills GRPO parallelism (see RFC 004).
|
| 48 |
+
app = create_app(
|
| 49 |
+
StabilizerForgeEnvironment,
|
| 50 |
+
StabilizerAction,
|
| 51 |
+
StabilizerObservation,
|
| 52 |
+
env_name="stabilizer_forge",
|
| 53 |
+
max_concurrent_envs=64,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def main():
|
| 58 |
+
"""Entry point for direct execution via `uv run server` or `python -m stabilizer_forge.server.app`."""
|
| 59 |
+
import argparse
|
| 60 |
+
import uvicorn
|
| 61 |
+
|
| 62 |
+
parser = argparse.ArgumentParser()
|
| 63 |
+
parser.add_argument("--host", default="0.0.0.0")
|
| 64 |
+
parser.add_argument("--port", type=int, default=8000)
|
| 65 |
+
args = parser.parse_args()
|
| 66 |
+
uvicorn.run(app, host=args.host, port=args.port)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
main()
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]>=0.2.2
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
stim>=1.13
|
| 5 |
+
numpy>=1.24
|
server/stabilizer_forge_environment.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""StabilizerForge environment.
|
| 2 |
+
|
| 3 |
+
Episode loop:
|
| 4 |
+
reset(task_id?) -> sample/load a task; emit initial observation
|
| 5 |
+
step(action) -> apply one Clifford gate, or FINALIZE
|
| 6 |
+
returns dense shaping reward per step,
|
| 7 |
+
full terminal reward at FINALIZE.
|
| 8 |
+
"""
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import json
|
| 12 |
+
import os
|
| 13 |
+
import random
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import Any
|
| 16 |
+
from uuid import uuid4
|
| 17 |
+
|
| 18 |
+
from openenv.core.env_server.interfaces import Environment
|
| 19 |
+
from openenv.core.env_server.types import State
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from ..models import StabilizerAction, StabilizerObservation
|
| 23 |
+
from .verifier import match_fraction
|
| 24 |
+
except ImportError: # pragma: no cover (in-container imports without package context)
|
| 25 |
+
from models import StabilizerAction, StabilizerObservation
|
| 26 |
+
from server.verifier import match_fraction
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# Reward weights (keep aligned with README)
|
| 30 |
+
W_MATCH = 0.4
|
| 31 |
+
W_GATE_EFF = 0.2
|
| 32 |
+
W_TWOQ_EFF = 0.2
|
| 33 |
+
W_CONN = 0.1
|
| 34 |
+
W_FORMAT = 0.1
|
| 35 |
+
SHAPING_COEF = 0.05 # per-step Δmatch_fraction shaping
|
| 36 |
+
MAX_CONSECUTIVE_VIOLATIONS = 5
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _default_tasks_path() -> str:
|
| 40 |
+
"""Resolve the default tasks file. Looks for env var, then sibling tasks.jsonl."""
|
| 41 |
+
env_path = os.environ.get("STABILIZER_FORGE_TASKS")
|
| 42 |
+
if env_path:
|
| 43 |
+
return env_path
|
| 44 |
+
here = Path(__file__).resolve().parent.parent # stabilizer_forge/
|
| 45 |
+
candidate = here / "tasks.jsonl"
|
| 46 |
+
if candidate.exists():
|
| 47 |
+
return str(candidate)
|
| 48 |
+
# Fallback: project root
|
| 49 |
+
return str(here.parent / "tasks.jsonl")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _load_tasks(path: str) -> list[dict]:
|
| 53 |
+
p = Path(path)
|
| 54 |
+
if not p.exists():
|
| 55 |
+
return []
|
| 56 |
+
tasks: list[dict] = []
|
| 57 |
+
with p.open() as f:
|
| 58 |
+
for line in f:
|
| 59 |
+
line = line.strip()
|
| 60 |
+
if not line:
|
| 61 |
+
continue
|
| 62 |
+
tasks.append(json.loads(line))
|
| 63 |
+
return tasks
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _gate_to_stim(action: StabilizerAction) -> str:
|
| 67 |
+
"""Render a single action as one line of Stim text."""
|
| 68 |
+
if action.op in {"H", "S"}:
|
| 69 |
+
return f"{action.op} {action.qubits[0]}"
|
| 70 |
+
if action.op == "CX":
|
| 71 |
+
return f"CX {action.qubits[0]} {action.qubits[1]}"
|
| 72 |
+
raise ValueError(f"Cannot render gate: {action.op}")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _validate_action(
|
| 76 |
+
action: StabilizerAction, n_qubits: int
|
| 77 |
+
) -> tuple[bool, str]:
|
| 78 |
+
"""Schema/range validation. Returns (is_valid, error_msg)."""
|
| 79 |
+
if action.op == "FINALIZE":
|
| 80 |
+
if action.qubits:
|
| 81 |
+
return False, "FINALIZE takes no qubits."
|
| 82 |
+
return True, ""
|
| 83 |
+
if action.op in {"H", "S"}:
|
| 84 |
+
if len(action.qubits) != 1:
|
| 85 |
+
return False, f"{action.op} requires exactly 1 qubit, got {len(action.qubits)}."
|
| 86 |
+
q = action.qubits[0]
|
| 87 |
+
if not (0 <= q < n_qubits):
|
| 88 |
+
return False, f"qubit {q} out of range [0, {n_qubits})."
|
| 89 |
+
return True, ""
|
| 90 |
+
if action.op == "CX":
|
| 91 |
+
if len(action.qubits) != 2:
|
| 92 |
+
return False, f"CX requires exactly 2 qubits, got {len(action.qubits)}."
|
| 93 |
+
c, t = action.qubits
|
| 94 |
+
if c == t:
|
| 95 |
+
return False, "CX control and target must differ."
|
| 96 |
+
for q in (c, t):
|
| 97 |
+
if not (0 <= q < n_qubits):
|
| 98 |
+
return False, f"qubit {q} out of range [0, {n_qubits})."
|
| 99 |
+
return True, ""
|
| 100 |
+
return False, f"unknown op {action.op}"
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class StabilizerForgeEnvironment(Environment):
|
| 104 |
+
"""Single-agent env: emit Clifford gates to encode a target stabilizer code."""
|
| 105 |
+
|
| 106 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 107 |
+
|
| 108 |
+
def __init__(self, tasks_path: str | None = None):
|
| 109 |
+
super().__init__()
|
| 110 |
+
self._tasks_path = tasks_path or _default_tasks_path()
|
| 111 |
+
self._tasks = _load_tasks(self._tasks_path)
|
| 112 |
+
self._task: dict[str, Any] | None = None
|
| 113 |
+
self._gates: list[str] = []
|
| 114 |
+
self._cnot_count = 0
|
| 115 |
+
self._nonadj_cnot_count = 0
|
| 116 |
+
self._format_violations = 0
|
| 117 |
+
self._consecutive_violations = 0
|
| 118 |
+
self._last_match_fraction = 0.0
|
| 119 |
+
self._last_match_results: list[bool] = []
|
| 120 |
+
self._finalized = False
|
| 121 |
+
self._rng = random.Random()
|
| 122 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 123 |
+
|
| 124 |
+
# ---------- Helpers ----------
|
| 125 |
+
|
| 126 |
+
def _circuit_text(self) -> str:
|
| 127 |
+
return "\n".join(self._gates)
|
| 128 |
+
|
| 129 |
+
def _is_adjacent(self, a: int, b: int) -> bool:
|
| 130 |
+
edges = self._task.get("connectivity_edges") if self._task else None
|
| 131 |
+
if edges is None:
|
| 132 |
+
return True # all-to-all
|
| 133 |
+
edge_set = {tuple(sorted(e)) for e in edges}
|
| 134 |
+
return tuple(sorted((a, b))) in edge_set
|
| 135 |
+
|
| 136 |
+
def _compute_match(self) -> tuple[float, list[bool]]:
|
| 137 |
+
if not self._task:
|
| 138 |
+
return 0.0, []
|
| 139 |
+
text = self._circuit_text()
|
| 140 |
+
n = self._task["n_qubits"]
|
| 141 |
+
targets = self._task["target_stabilizers"]
|
| 142 |
+
frac, results_dict = match_fraction(text, targets, n)
|
| 143 |
+
ordered = [results_dict[s] for s in targets]
|
| 144 |
+
return frac, ordered
|
| 145 |
+
|
| 146 |
+
def _make_obs(
|
| 147 |
+
self,
|
| 148 |
+
*,
|
| 149 |
+
reward: float,
|
| 150 |
+
done: bool,
|
| 151 |
+
last_action_valid: bool = True,
|
| 152 |
+
last_action_error: str = "",
|
| 153 |
+
) -> StabilizerObservation:
|
| 154 |
+
assert self._task is not None
|
| 155 |
+
bench = int(self._task.get("benchmark_optimum", 0) or 0)
|
| 156 |
+
return StabilizerObservation(
|
| 157 |
+
task_id=self._task["task_id"],
|
| 158 |
+
target_stabilizers=list(self._task["target_stabilizers"]),
|
| 159 |
+
n_qubits=int(self._task["n_qubits"]),
|
| 160 |
+
connectivity_edges=self._task.get("connectivity_edges"),
|
| 161 |
+
gates_so_far=list(self._gates),
|
| 162 |
+
current_circuit=self._circuit_text(),
|
| 163 |
+
current_match=list(self._last_match_results),
|
| 164 |
+
match_fraction=float(self._last_match_fraction),
|
| 165 |
+
gates_emitted=len(self._gates),
|
| 166 |
+
cnot_count=self._cnot_count,
|
| 167 |
+
nonadj_cnot_count=self._nonadj_cnot_count,
|
| 168 |
+
gate_budget=int(self._task.get("gate_budget", 2 * max(1, bench))),
|
| 169 |
+
gate_budget_remaining=max(
|
| 170 |
+
0,
|
| 171 |
+
int(self._task.get("gate_budget", 2 * max(1, bench))) - len(self._gates),
|
| 172 |
+
),
|
| 173 |
+
benchmark_optimum=bench,
|
| 174 |
+
benchmark_optimum_2q=int(self._task.get("benchmark_optimum_2q", 0) or 0),
|
| 175 |
+
format_violations=self._format_violations,
|
| 176 |
+
consecutive_violations=self._consecutive_violations,
|
| 177 |
+
last_action_valid=last_action_valid,
|
| 178 |
+
last_action_error=last_action_error,
|
| 179 |
+
step_count=self._state.step_count,
|
| 180 |
+
finalized=self._finalized,
|
| 181 |
+
done=done,
|
| 182 |
+
reward=reward,
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
def _pick_task(self, task_id: str | None, seed: int | None) -> dict[str, Any]:
|
| 186 |
+
if task_id is not None:
|
| 187 |
+
for t in self._tasks:
|
| 188 |
+
if t.get("task_id") == task_id:
|
| 189 |
+
return t
|
| 190 |
+
raise ValueError(f"task_id '{task_id}' not found in {self._tasks_path}")
|
| 191 |
+
if not self._tasks:
|
| 192 |
+
raise RuntimeError(
|
| 193 |
+
f"No tasks loaded from {self._tasks_path}. "
|
| 194 |
+
"Set STABILIZER_FORGE_TASKS or place tasks.jsonl next to the env."
|
| 195 |
+
)
|
| 196 |
+
rng = random.Random(seed) if seed is not None else self._rng
|
| 197 |
+
return rng.choice(self._tasks)
|
| 198 |
+
|
| 199 |
+
# ---------- Gym API ----------
|
| 200 |
+
|
| 201 |
+
def reset(
|
| 202 |
+
self,
|
| 203 |
+
seed: int | None = None,
|
| 204 |
+
episode_id: str | None = None,
|
| 205 |
+
task_id: str | None = None,
|
| 206 |
+
**kwargs: Any,
|
| 207 |
+
) -> StabilizerObservation:
|
| 208 |
+
if seed is not None:
|
| 209 |
+
self._rng = random.Random(seed)
|
| 210 |
+
self._task = self._pick_task(task_id=task_id, seed=seed)
|
| 211 |
+
|
| 212 |
+
self._gates = []
|
| 213 |
+
self._cnot_count = 0
|
| 214 |
+
self._nonadj_cnot_count = 0
|
| 215 |
+
self._format_violations = 0
|
| 216 |
+
self._consecutive_violations = 0
|
| 217 |
+
self._finalized = False
|
| 218 |
+
self._state = State(
|
| 219 |
+
episode_id=episode_id or str(uuid4()), step_count=0
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# Initial match (empty circuit on |0...0>)
|
| 223 |
+
self._last_match_fraction, self._last_match_results = self._compute_match()
|
| 224 |
+
return self._make_obs(reward=0.0, done=False)
|
| 225 |
+
|
| 226 |
+
def step(self, action: StabilizerAction, **kwargs: Any) -> StabilizerObservation: # type: ignore[override]
|
| 227 |
+
if self._task is None:
|
| 228 |
+
raise RuntimeError("step() called before reset().")
|
| 229 |
+
self._state.step_count += 1
|
| 230 |
+
|
| 231 |
+
n_qubits = int(self._task["n_qubits"])
|
| 232 |
+
gate_budget = int(
|
| 233 |
+
self._task.get(
|
| 234 |
+
"gate_budget", 2 * max(1, int(self._task.get("benchmark_optimum", 1)))
|
| 235 |
+
)
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# --- Validate ---
|
| 239 |
+
ok, err = _validate_action(action, n_qubits)
|
| 240 |
+
if not ok:
|
| 241 |
+
self._format_violations += 1
|
| 242 |
+
self._consecutive_violations += 1
|
| 243 |
+
done = (
|
| 244 |
+
self._consecutive_violations >= MAX_CONSECUTIVE_VIOLATIONS
|
| 245 |
+
or len(self._gates) >= gate_budget
|
| 246 |
+
)
|
| 247 |
+
return self._make_obs(
|
| 248 |
+
reward=W_FORMAT * -1.0,
|
| 249 |
+
done=done,
|
| 250 |
+
last_action_valid=False,
|
| 251 |
+
last_action_error=err,
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
self._consecutive_violations = 0
|
| 255 |
+
|
| 256 |
+
# --- FINALIZE: terminal reward ---
|
| 257 |
+
if action.op == "FINALIZE":
|
| 258 |
+
self._finalized = True
|
| 259 |
+
self._last_match_fraction, self._last_match_results = self._compute_match()
|
| 260 |
+
bench = max(1, int(self._task.get("benchmark_optimum", 1)))
|
| 261 |
+
bench_2q = max(1, int(self._task.get("benchmark_optimum_2q", bench)))
|
| 262 |
+
gate_eff = max(0.0, 1.0 - len(self._gates) / (1.5 * bench))
|
| 263 |
+
twoq_eff = max(0.0, 1.0 - self._cnot_count / (1.5 * bench_2q))
|
| 264 |
+
terminal = (
|
| 265 |
+
W_MATCH * self._last_match_fraction
|
| 266 |
+
+ W_GATE_EFF * gate_eff
|
| 267 |
+
+ W_TWOQ_EFF * twoq_eff
|
| 268 |
+
)
|
| 269 |
+
return self._make_obs(reward=terminal, done=True)
|
| 270 |
+
|
| 271 |
+
# --- Apply gate ---
|
| 272 |
+
gate_str = _gate_to_stim(action)
|
| 273 |
+
self._gates.append(gate_str)
|
| 274 |
+
|
| 275 |
+
conn_penalty = 0.0
|
| 276 |
+
if action.op == "CX":
|
| 277 |
+
self._cnot_count += 1
|
| 278 |
+
if not self._is_adjacent(action.qubits[0], action.qubits[1]):
|
| 279 |
+
self._nonadj_cnot_count += 1
|
| 280 |
+
conn_penalty = -1.0
|
| 281 |
+
|
| 282 |
+
prev_match = self._last_match_fraction
|
| 283 |
+
self._last_match_fraction, self._last_match_results = self._compute_match()
|
| 284 |
+
delta = self._last_match_fraction - prev_match
|
| 285 |
+
|
| 286 |
+
# Termination if we exceed budget without finalizing
|
| 287 |
+
done = len(self._gates) >= gate_budget
|
| 288 |
+
|
| 289 |
+
step_reward = SHAPING_COEF * delta + W_CONN * conn_penalty
|
| 290 |
+
return self._make_obs(reward=step_reward, done=done)
|
| 291 |
+
|
| 292 |
+
@property
|
| 293 |
+
def state(self) -> State:
|
| 294 |
+
return self._state
|
server/verifier.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Stim-based stabilizer verifier.
|
| 2 |
+
|
| 3 |
+
Vendored / adapted from uw-math-ai/quantum-ai's tools/check_stabilizers.py.
|
| 4 |
+
The core check uses Stim's TableauSimulator + peek_observable_expectation,
|
| 5 |
+
which is exact and polynomial in the number of qubits for Clifford circuits.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
import stim
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def preprocess_stim_text(raw: str) -> str:
|
| 15 |
+
if raw is None:
|
| 16 |
+
return ""
|
| 17 |
+
text = raw.strip().replace("\\n", "\n")
|
| 18 |
+
lines = []
|
| 19 |
+
for line in text.splitlines():
|
| 20 |
+
line = line.strip()
|
| 21 |
+
if not line or line.startswith("#"):
|
| 22 |
+
continue
|
| 23 |
+
line = re.sub(r"\s+", " ", line)
|
| 24 |
+
lines.append(line)
|
| 25 |
+
return "\n".join(lines) + ("\n" if lines else "")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def check_stabilizers(
|
| 29 |
+
circuit_text: str, stabilizers: list[str], n_qubits: int | None = None
|
| 30 |
+
) -> dict[str, bool]:
|
| 31 |
+
"""Return {stabilizer: preserved?} for a circuit applied to |0...0>.
|
| 32 |
+
|
| 33 |
+
If n_qubits is provided, ensures the simulator has at least that many qubits
|
| 34 |
+
by prepending an I gate on qubit n-1. This matters because Stim's simulator
|
| 35 |
+
only allocates qubits that are touched by the circuit.
|
| 36 |
+
"""
|
| 37 |
+
text = preprocess_stim_text(circuit_text)
|
| 38 |
+
if n_qubits is not None and n_qubits > 0:
|
| 39 |
+
text = f"I {n_qubits - 1}\n" + text
|
| 40 |
+
if not text:
|
| 41 |
+
text = "I 0\n"
|
| 42 |
+
|
| 43 |
+
circ = stim.Circuit(text)
|
| 44 |
+
sim = stim.TableauSimulator()
|
| 45 |
+
sim.do(circ)
|
| 46 |
+
|
| 47 |
+
width = max(circ.num_qubits, n_qubits or 0)
|
| 48 |
+
results: dict[str, bool] = {}
|
| 49 |
+
for stab in stabilizers:
|
| 50 |
+
padded = stab + "I" * max(0, width - len(stab))
|
| 51 |
+
pauli = stim.PauliString(padded)
|
| 52 |
+
expectation = sim.peek_observable_expectation(pauli)
|
| 53 |
+
results[stab] = expectation > 0
|
| 54 |
+
return results
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def match_fraction(
|
| 58 |
+
circuit_text: str, stabilizers: list[str], n_qubits: int
|
| 59 |
+
) -> tuple[float, dict[str, bool]]:
|
| 60 |
+
res = check_stabilizers(circuit_text, stabilizers, n_qubits=n_qubits)
|
| 61 |
+
if not res:
|
| 62 |
+
return 0.0, res
|
| 63 |
+
frac = sum(1 for v in res.values() if v) / len(res)
|
| 64 |
+
return frac, res
|
tasks.jsonl
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"task_id": "ghz2", "source_code": "Bell state", "n_qubits": 2, "target_stabilizers": ["XX", "ZZ"], "connectivity_edges": null, "gate_budget": 20, "benchmark_optimum": 2, "benchmark_optimum_2q": 1, "tier": 1}
|
| 2 |
+
{"task_id": "ghz3", "source_code": "3-qubit GHZ", "n_qubits": 3, "target_stabilizers": ["XXX", "ZZI", "IZZ"], "connectivity_edges": null, "gate_budget": 20, "benchmark_optimum": 3, "benchmark_optimum_2q": 2, "tier": 1}
|
| 3 |
+
{"task_id": "ghz4", "source_code": "4-qubit GHZ", "n_qubits": 4, "target_stabilizers": ["XXXX", "ZZII", "IZZI", "IIZZ"], "connectivity_edges": null, "gate_budget": 20, "benchmark_optimum": 4, "benchmark_optimum_2q": 3, "tier": 1}
|
| 4 |
+
{"task_id": "ghz5", "source_code": "5-qubit GHZ", "n_qubits": 5, "target_stabilizers": ["XXXXX", "ZZIII", "IZZII", "IIZZI", "IIIZZ"], "connectivity_edges": null, "gate_budget": 20, "benchmark_optimum": 5, "benchmark_optimum_2q": 4, "tier": 1}
|
| 5 |
+
{"task_id": "ghz6", "source_code": "6-qubit GHZ", "n_qubits": 6, "target_stabilizers": ["XXXXXX", "ZZIIII", "IZZIII", "IIZZII", "IIIZZI", "IIIIZZ"], "connectivity_edges": null, "gate_budget": 24, "benchmark_optimum": 6, "benchmark_optimum_2q": 5, "tier": 1}
|
| 6 |
+
{"task_id": "ghz7", "source_code": "7-qubit GHZ", "n_qubits": 7, "target_stabilizers": ["XXXXXXX", "ZZIIIII", "IZZIIII", "IIZZIII", "IIIZZII", "IIIIZZI", "IIIIIZZ"], "connectivity_edges": null, "gate_budget": 28, "benchmark_optimum": 7, "benchmark_optimum_2q": 6, "tier": 1}
|
| 7 |
+
{"task_id": "ghz8", "source_code": "8-qubit GHZ", "n_qubits": 8, "target_stabilizers": ["XXXXXXXX", "ZZIIIIII", "IZZIIIII", "IIZZIIII", "IIIZZIII", "IIIIZZII", "IIIIIZZI", "IIIIIIZZ"], "connectivity_edges": null, "gate_budget": 32, "benchmark_optimum": 8, "benchmark_optimum_2q": 7, "tier": 1}
|
| 8 |
+
{"task_id": "ghz9", "source_code": "9-qubit GHZ", "n_qubits": 9, "target_stabilizers": ["XXXXXXXXX", "ZZIIIIIII", "IZZIIIIII", "IIZZIIIII", "IIIZZIIII", "IIIIZZIII", "IIIIIZZII", "IIIIIIZZI", "IIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 36, "benchmark_optimum": 9, "benchmark_optimum_2q": 8, "tier": 2}
|
| 9 |
+
{"task_id": "ghz10", "source_code": "10-qubit GHZ", "n_qubits": 10, "target_stabilizers": ["XXXXXXXXXX", "ZZIIIIIIII", "IZZIIIIIII", "IIZZIIIIII", "IIIZZIIIII", "IIIIZZIIII", "IIIIIZZIII", "IIIIIIZZII", "IIIIIIIZZI", "IIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 40, "benchmark_optimum": 10, "benchmark_optimum_2q": 9, "tier": 2}
|
| 10 |
+
{"task_id": "ghz11", "source_code": "11-qubit GHZ", "n_qubits": 11, "target_stabilizers": ["XXXXXXXXXXX", "ZZIIIIIIIII", "IZZIIIIIIII", "IIZZIIIIIII", "IIIZZIIIIII", "IIIIZZIIIII", "IIIIIZZIIII", "IIIIIIZZIII", "IIIIIIIZZII", "IIIIIIIIZZI", "IIIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 44, "benchmark_optimum": 11, "benchmark_optimum_2q": 10, "tier": 2}
|
| 11 |
+
{"task_id": "ghz12", "source_code": "12-qubit GHZ", "n_qubits": 12, "target_stabilizers": ["XXXXXXXXXXXX", "ZZIIIIIIIIII", "IZZIIIIIIIII", "IIZZIIIIIIII", "IIIZZIIIIIII", "IIIIZZIIIIII", "IIIIIZZIIIII", "IIIIIIZZIIII", "IIIIIIIZZIII", "IIIIIIIIZZII", "IIIIIIIIIZZI", "IIIIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 48, "benchmark_optimum": 12, "benchmark_optimum_2q": 11, "tier": 2}
|
| 12 |
+
{"task_id": "ghz13", "source_code": "13-qubit GHZ", "n_qubits": 13, "target_stabilizers": ["XXXXXXXXXXXXX", "ZZIIIIIIIIIII", "IZZIIIIIIIIII", "IIZZIIIIIIIII", "IIIZZIIIIIIII", "IIIIZZIIIIIII", "IIIIIZZIIIIII", "IIIIIIZZIIIII", "IIIIIIIZZIIII", "IIIIIIIIZZIII", "IIIIIIIIIZZII", "IIIIIIIIIIZZI", "IIIIIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 52, "benchmark_optimum": 13, "benchmark_optimum_2q": 12, "tier": 2}
|
| 13 |
+
{"task_id": "iceberg_code_m_2", "source_code": "Iceberg Code m=2", "n_qubits": 4, "target_stabilizers": ["XXXX", "ZZZZ"], "connectivity_edges": null, "gate_budget": 20, "benchmark_optimum": 6, "benchmark_optimum_2q": 5, "tier": 1}
|
| 14 |
+
{"task_id": "4-qubit_detector_code", "source_code": "4-Qubit Detector Code", "n_qubits": 4, "target_stabilizers": ["XXXX", "ZZZZ"], "connectivity_edges": null, "gate_budget": 20, "benchmark_optimum": 6, "benchmark_optimum_2q": 5, "tier": 1}
|
| 15 |
+
{"task_id": "iceberg_code_m_3", "source_code": "Iceberg Code m=3", "n_qubits": 6, "target_stabilizers": ["XXXXXX", "ZZZZZZ"], "connectivity_edges": null, "gate_budget": 30, "benchmark_optimum": 10, "benchmark_optimum_2q": 9, "tier": 1}
|
| 16 |
+
{"task_id": "hypercube_code_l_1", "source_code": "Hypercube Code l=1", "n_qubits": 6, "target_stabilizers": ["XXXXXX", "ZZZZZZ"], "connectivity_edges": null, "gate_budget": 30, "benchmark_optimum": 10, "benchmark_optimum_2q": 9, "tier": 1}
|
| 17 |
+
{"task_id": "iceberg_code_m_4", "source_code": "Iceberg Code m=4", "n_qubits": 8, "target_stabilizers": ["XXXXXXXX", "ZZZZZZZZ"], "connectivity_edges": null, "gate_budget": 42, "benchmark_optimum": 14, "benchmark_optimum_2q": 13, "tier": 1}
|
| 18 |
+
{"task_id": "perfect_5-qubit_code", "source_code": "Perfect 5-Qubit Code", "n_qubits": 5, "target_stabilizers": ["XZZXI", "IXZZX", "XIXZZ", "ZXIXZ"], "connectivity_edges": null, "gate_budget": 114, "benchmark_optimum": 38, "benchmark_optimum_2q": 19, "tier": 2}
|
| 19 |
+
{"task_id": "steane", "source_code": "Steane", "n_qubits": 7, "target_stabilizers": ["XXIIXXI", "XIXIXIX", "IIIXXXX", "ZZIIZZI", "ZIZIZIZ", "IIIZZZZ"], "connectivity_edges": null, "gate_budget": 78, "benchmark_optimum": 26, "benchmark_optimum_2q": 23, "tier": 2}
|
| 20 |
+
{"task_id": "shor", "source_code": "Shor", "n_qubits": 9, "target_stabilizers": ["XXXXXXIII", "XXXIIIXXX", "ZZIIIIIII", "ZIZIIIIII", "IIIZZIIII", "IIIZIZIII", "IIIIIIZZI", "IIIIIIZIZ"], "connectivity_edges": null, "gate_budget": 75, "benchmark_optimum": 25, "benchmark_optimum_2q": 23, "tier": 2}
|
| 21 |
+
{"task_id": "rotated_surface_code_d_3", "source_code": "Rotated Surface Code d=3", "n_qubits": 9, "target_stabilizers": ["XXIXXIIII", "IIIIXXIXX", "IIXIIXIII", "IIIXIIXII", "IIIZZIZZI", "IZZIZZIII", "ZZIIIIIII", "IIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 69, "benchmark_optimum": 23, "benchmark_optimum_2q": 19, "tier": 2}
|
| 22 |
+
{"task_id": "hex_color_code_d_3", "source_code": "Hex Color Code d=3", "n_qubits": 7, "target_stabilizers": ["XXXXIII", "XIXIXIX", "IIXXXXI", "ZZZZIII", "ZIZIZIZ", "IIZZZZI"], "connectivity_edges": null, "gate_budget": 60, "benchmark_optimum": 20, "benchmark_optimum_2q": 17, "tier": 2}
|
| 23 |
+
{"task_id": "square_octagon_color_code_d_3", "source_code": "Square Octagon Color Code d=3", "n_qubits": 7, "target_stabilizers": ["IIXIXXX", "IXIXIXX", "XXXIIXI", "IIZIZZZ", "IZIZIZZ", "ZZZIIZI"], "connectivity_edges": null, "gate_budget": 66, "benchmark_optimum": 22, "benchmark_optimum_2q": 19, "tier": 2}
|
| 24 |
+
{"task_id": "carbon", "source_code": "Carbon", "n_qubits": 12, "target_stabilizers": ["XXXIIIXXXIII", "IIXXXIIIXXXI", "XIIIXXXIIIXX", "XXXXXXIIIIII", "IIIIIIXXXXXX", "IIZZZZIZIZII", "ZIIIZIZZZIIZ", "ZZZIIZZIIIZI", "ZIIZZZIIZIZI", "IZZIIIZZIZIZ"], "connectivity_edges": null, "gate_budget": 201, "benchmark_optimum": 67, "benchmark_optimum_2q": 62, "tier": 2}
|
| 25 |
+
{"task_id": "tetrahedral", "source_code": "Tetrahedral", "n_qubits": 15, "target_stabilizers": ["XXXXXXXXIIIIIII", "IXXIXXIIXXIXXII", "IIXXIXXIIXXXIXI", "IIIIXXXXIIIXXXX", "ZZZZIIIIIIIIIII", "IZZIZZIIIIIIIII", "IIZZIZZIIIIIIII", "IIIIZZZZIIIIIII", "IZIIZIIIZIIIZII", "IIZIIZIIIZIZIII", "IIZZIIIIIZZIIII", "IIIIZZIIIIIZZII", "IIIIIZZIIIIZIZI", "IIIIIIZZIIIIIZZ"], "connectivity_edges": null, "gate_budget": 168, "benchmark_optimum": 56, "benchmark_optimum_2q": 52, "tier": 3}
|
| 26 |
+
{"task_id": "hamming", "source_code": "Hamming", "n_qubits": 15, "target_stabilizers": ["IIIIIIIXXXXXXXX", "IIIXXXXIIIIXXXX", "IXXIIXXIIXXIIXX", "XIXIXIXIXIXIXIX", "IIIIIIIZZZZZZZZ", "IIIZZZZIIIIZZZZ", "IZZIIZZIIZZIIZZ", "ZIZIZIZIZIZIZIZ"], "connectivity_edges": null, "gate_budget": 207, "benchmark_optimum": 69, "benchmark_optimum_2q": 65, "tier": 3}
|
| 27 |
+
{"task_id": "square_octagon_color_code_d_5", "source_code": "Square Octagon Color Code d=5", "n_qubits": 17, "target_stabilizers": ["IIIIIXIIIXIXXIIII", "IIIIIIIIXIXIIXIXI", "IIIXIIIXIIIIIIXIX", "IIXIIIXIIIIIIIXIX", "IIIIXXXXXIXXIIIIX", "IXIIXIIIIIXIIXIII", "IIIIIIIIXXIXIIIXI", "XIXXIIIIIIIIIIXII", "IIIIIZIIIZIZZIIII", "IIIIIIIIZIZIIZIZI", "IIIZIIIZIIIIIIZIZ", "IIZIIIZIIIIIIIZIZ", "IIIIZZZZZIZZIIIIZ", "IZIIZIIIIIZIIZIII", "IIIIIIIIZZIZIIIZI", "ZIZZIIIIIIIIIIZII"], "connectivity_edges": null, "gate_budget": 288, "benchmark_optimum": 96, "benchmark_optimum_2q": 88, "tier": 3}
|
| 28 |
+
{"task_id": "hex_color_code_d_5", "source_code": "Hex Color Code d=5", "n_qubits": 19, "target_stabilizers": ["IIXIIXIIXXIIIIIIIII", "IIIIIIIIIIXIXIIIIXX", "IIIIIIIIIIXXIXIIIIX", "XXIIIIIIIIIIIIXXIII", "XXIXXIXIIIIIIIIIXII", "IIXIXIIIXIIIIIIIXII", "IXIIIIXXIIIIIIIXIII", "IIIXIIXXIIXXXIIIIII", "IIIXXIIIXXIXIXIIIII", "IIZIIZIIZZIIIIIIIII", "IIIIIIIIIIZIZIIIIZZ", "IIIIIIIIIIZZIZIIIIZ", "ZZIIIIIIIIIIIIZZIII", "ZZIZZIZIIIIIIIIIZII", "IIZIZIIIZIIIIIIIZII", "IZIIIIZZIIIIIIIZIII", "IIIZIIZZIIZZZIIIIII", "IIIZZIIIZZIZIZIIIII"], "connectivity_edges": null, "gate_budget": 333, "benchmark_optimum": 111, "benchmark_optimum_2q": 102, "tier": 3}
|
| 29 |
+
{"task_id": "rotated_surface_code_d_5", "source_code": "Rotated Surface Code d=5", "n_qubits": 25, "target_stabilizers": ["XXIIIXXIIIIIIIIIIIIIIIIII", "IIIIIIIIIIXXIIIXXIIIIIIII", "IIIIIIXXIIIXXIIIIIIIIIIII", "IIIIIIIIIIIIIIIIXXIIIXXII", "IIXXIIIXXIIIIIIIIIIIIIIII", "IIIIIIIIIIIIXXIIIXXIIIIII", "IIIIIIIIXXIIIXXIIIIIIIIII", "IIIIIIIIIIIIIIIIIIXXIIIXX", "IIIIXIIIIXIIIIIIIIIIIIIII", "IIIIIXIIIIXIIIIIIIIIIIIII", "IIIIIIIIIIIIIIXIIIIXIIIII", "IIIIIIIIIIIIIIIXIIIIXIIII", "IIIIIZZIIIZZIIIIIIIIIIIII", "IIIIIIIIIIIIIIIZZIIIZZIII", "IZZIIIZZIIIIIIIIIIIIIIIII", "IIIIIIIIIIIZZIIIZZIIIIIII", "IIIIIIIZZIIIZZIIIIIIIIIII", "IIIIIIIIIIIIIIIIIZZIIIZZI", "IIIZZIIIZZIIIIIIIIIIIIIII", "IIIIIIIIIIIIIZZIIIZZIIIII", "ZZIIIIIIIIIIIIIIIIIIIIIII", "IIIIIIIIIIIIIIIIIIIIIZZII", "IIZZIIIIIIIIIIIIIIIIIIIII", "IIIIIIIIIIIIIIIIIIIIIIIZZ"], "connectivity_edges": null, "gate_budget": 348, "benchmark_optimum": 116, "benchmark_optimum_2q": 104, "tier": 3}
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|