# OpenEnv manifest (Section 9.1 of the plan).
# Validate with: openenv validate
name: qubit-medic
version: 1.0.0
description: |
  RL training environment for LLM-based quantum error-correction decoders.

  Built on Stim + PyMatching with five independent verifiable rewards
  (logical correction, syndrome consistency, Hamming overlap, format
  compliance, PyMatching beat-rate). Designed to reproduce the AlphaQubit
  (Nature 2024) two-stage decoder pipeline at distance-3 with off-the-shelf
  3B-parameter LLMs trained on a single Colab T4.

authors:
  - name: Qubit-Medic team
license: MIT
homepage: https://huggingface.co/spaces/qubit-medic/qubit-medic

server:
  module: qubit_medic.server.app
  app: app
  protocol: http
  port: 7860

endpoints:
  # All endpoints below are registered automatically by
  # openenv.core.create_fastapi_app via qubit_medic.server.app.
  reset:
    method: POST
    path: /reset
    request_model: openenv.core.types.ResetRequest
    response_model: openenv.core.types.ResetResponse
  step:
    method: POST
    path: /step
    request_model: openenv.core.types.StepRequest
    response_model: openenv.core.types.StepResponse
  state:
    method: GET
    path: /state
    response_model: qubit_medic.server.openenv_adapter.QubitMedicState
  schema:
    method: GET
    path: /schema
  metadata:
    method: GET
    path: /metadata
  health:
    method: GET
    path: /health
  healthz:        # Day-0 deployment-substrate probe (extra)
    method: GET
    path: /healthz
  decode:         # PyMatching baseline demo (extra)
    method: POST
    path: /decode

models:
  environment: qubit_medic.server.openenv_adapter.QubitMedicEnvironment
  action: qubit_medic.server.openenv_adapter.QubitMedicAction
  observation: qubit_medic.server.openenv_adapter.QubitMedicObservation
  state: qubit_medic.server.openenv_adapter.QubitMedicState

rewards:
  - name: logical_correction
    weight: 0.40
    description: 1 if the predicted Pauli frame preserves the logical Z observable.
  - name: syndrome_consistency
    weight: 0.20
    description: Hamming similarity over final-round detector parities.
  - name: hamming_overlap
    weight: 0.20
    description: Mean Jaccard similarity vs. PyMatching reference Pauli frame.
  - name: format_compliance
    weight: 0.10
    description: 1 / 0.5 / 0 for full / partial / unparseable output.
  - name: pymatching_beat
    weight: 0.10
    description: 1 iff PyMatching wrong AND model right on this syndrome.

curriculum:
  - name: L1_warmup
    distance: 3
    rounds: 1
    p: 0.0001
    promotion_threshold: 0.80
  - name: L2_target
    distance: 3
    rounds: 3
    p: 0.001
    promotion_threshold: 0.70
  - name: L3_stretch
    distance: 5
    rounds: 5
    p: 0.001
    promotion_threshold: 0.30

citations:
  - gidney_stim_2021:
      title: "Stim: a fast stabilizer circuit simulator"
      authors: Gidney, Craig
      venue: Quantum 5:497
      doi: 10.22331/q-2021-07-06-497
      arxiv: 2103.02202
      note: |
        Field-standard Clifford simulator for quantum error correction.
        Same simulation substrate used by AlphaQubit (Bausch 2024) and
        Willow (Acharya 2024); not a homemade simulator.
  - bausch_alphaqubit_2024:
      title: Learning high-accuracy error decoding for quantum processors
      authors: Bausch et al.
      venue: Nature 635:834
      doi: 10.1038/s41586-024-08148-8
  - acharya_willow_2024:
      title: Quantum error correction below the surface code threshold
      authors: Acharya et al. (Google QAI)
      venue: arXiv:2408.13687
  - gidney_si1000_2021:
      title: A fault-tolerant honeycomb memory
      authors: Gidney & Fowler
      venue: arXiv:2108.10457
  - higgott_pymatching_2023:
      title: Sparse Blossom - PyMatching v2
      authors: Higgott & Gidney
      venue: arXiv:2303.15933
  - shao_grpo_2024:
      title: DeepSeekMath - introduces GRPO
      authors: Shao et al.
      venue: arXiv:2402.03300