Merge pull request #3 from athurlow/claude/qcal-copilot-mvp-OZ9wj
Browse filesFix CSV-as-plot rendering + surface VLM parse errors + force JSON output
- src/qcal/analyzer.py +37 -2
- src/qcal/data.py +57 -1
src/qcal/analyzer.py
CHANGED
|
@@ -57,8 +57,21 @@ Only output the JSON. Do not wrap it in markdown fences.
|
|
| 57 |
"""
|
| 58 |
|
| 59 |
USER_PROMPT_TEMPLATE = (
|
| 60 |
-
"Analyze this quantum calibration artifact ({source})
|
| 61 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
|
|
@@ -79,6 +92,24 @@ class AnalysisResult:
|
|
| 79 |
def markdown(self) -> str:
|
| 80 |
if self.error:
|
| 81 |
return f"**Analyzer error ({self.backend}):** {self.error}\n\n```\n{self.raw_text}\n```"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
p = self.parsed
|
| 83 |
lines = [
|
| 84 |
f"**Experiment:** {p.get('experiment', 'n/a')}",
|
|
@@ -157,6 +188,10 @@ def _analyze_via_nim(image: Image.Image, extra: str, source: str) -> AnalysisRes
|
|
| 157 |
],
|
| 158 |
"temperature": 0.2,
|
| 159 |
"max_tokens": 1024,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
}
|
| 161 |
try:
|
| 162 |
resp = requests.post(
|
|
|
|
| 57 |
"""
|
| 58 |
|
| 59 |
USER_PROMPT_TEMPLATE = (
|
| 60 |
+
"Analyze this quantum calibration artifact ({source}). "
|
| 61 |
+
"Respond with a SINGLE JSON object and nothing else — no prose before or "
|
| 62 |
+
"after, no markdown fences, no bullet points. The JSON must match this "
|
| 63 |
+
"schema exactly:\n\n"
|
| 64 |
+
"{{\n"
|
| 65 |
+
' "experiment": "<string>",\n'
|
| 66 |
+
' "qubit_id": "<string or null>",\n'
|
| 67 |
+
' "issues": ["<string>", ...],\n'
|
| 68 |
+
' "metrics": {{"<name>": "<value with units>", ...}},\n'
|
| 69 |
+
' "recommended_parameters": {{"<name>": <number or string>, ...}},\n'
|
| 70 |
+
' "drift_prediction": "<string>",\n'
|
| 71 |
+
' "confidence": <float 0..1>,\n'
|
| 72 |
+
' "notes": "<1-3 sentences>"\n'
|
| 73 |
+
"}}\n\n"
|
| 74 |
+
"Begin your reply with `{{` and end with `}}`.{extra}"
|
| 75 |
)
|
| 76 |
|
| 77 |
|
|
|
|
| 92 |
def markdown(self) -> str:
|
| 93 |
if self.error:
|
| 94 |
return f"**Analyzer error ({self.backend}):** {self.error}\n\n```\n{self.raw_text}\n```"
|
| 95 |
+
# The VLM returned 200 but the response wasn't parseable JSON (usually
|
| 96 |
+
# because the model wrote prose like "this doesn't look like a
|
| 97 |
+
# calibration plot"). Previously we'd render every field as 'n/a' with
|
| 98 |
+
# no indication of why — now surface the raw response so the user can
|
| 99 |
+
# see what the model actually said.
|
| 100 |
+
if not self.parsed:
|
| 101 |
+
snippet = (self.raw_text or "").strip() or "(empty response)"
|
| 102 |
+
if len(snippet) > 1200:
|
| 103 |
+
snippet = snippet[:1200] + "\n...[truncated]"
|
| 104 |
+
return (
|
| 105 |
+
f"**Analysis could not be parsed (backend: {self.backend}).** "
|
| 106 |
+
"The VLM returned a response but it wasn't valid JSON matching "
|
| 107 |
+
"the expected schema. Raw model output:\n\n"
|
| 108 |
+
f"```\n{snippet}\n```\n\n"
|
| 109 |
+
"_Tip: try a different image (Rabi/Ramsey/T1/T2/readout plots "
|
| 110 |
+
"work best), or re-run — the model occasionally flakes on "
|
| 111 |
+
"the first call after a cold start._"
|
| 112 |
+
)
|
| 113 |
p = self.parsed
|
| 114 |
lines = [
|
| 115 |
f"**Experiment:** {p.get('experiment', 'n/a')}",
|
|
|
|
| 188 |
],
|
| 189 |
"temperature": 0.2,
|
| 190 |
"max_tokens": 1024,
|
| 191 |
+
# Force JSON-only output on backends that support OpenAI's response
|
| 192 |
+
# format parameter (vLLM, most NIM deployments). The VLM otherwise
|
| 193 |
+
# sometimes replies with markdown prose despite the system prompt.
|
| 194 |
+
"response_format": {"type": "json_object"},
|
| 195 |
}
|
| 196 |
try:
|
| 197 |
resp = requests.post(
|
src/qcal/data.py
CHANGED
|
@@ -127,6 +127,62 @@ def _render_table_as_image(df: pd.DataFrame) -> Image.Image:
|
|
| 127 |
return Image.open(buf).convert("RGB")
|
| 128 |
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
def _fig_to_pil(fig) -> Image.Image:
|
| 131 |
import matplotlib.pyplot as plt
|
| 132 |
|
|
@@ -449,7 +505,7 @@ def load_payload(
|
|
| 449 |
if ext in SUPPORTED_TABLE_EXTS:
|
| 450 |
sep = "," if ext == ".csv" else "\t"
|
| 451 |
df = pd.read_csv(path, sep=sep)
|
| 452 |
-
img =
|
| 453 |
return CalibrationPayload(image=img, table=df, source_name=name, kind="csv")
|
| 454 |
|
| 455 |
if ext == ".npy":
|
|
|
|
| 127 |
return Image.open(buf).convert("RGB")
|
| 128 |
|
| 129 |
|
| 130 |
+
def _render_csv_for_vlm(
|
| 131 |
+
df: pd.DataFrame,
|
| 132 |
+
*,
|
| 133 |
+
experiment_type: str = "unknown",
|
| 134 |
+
title: Optional[str] = None,
|
| 135 |
+
) -> Image.Image:
|
| 136 |
+
"""Render a user-uploaded CSV as whatever image the VLM can actually analyze.
|
| 137 |
+
|
| 138 |
+
The Ising Calibration VLM is trained on *plots* (Rabi traces, T1 decays,
|
| 139 |
+
IQ scatter, etc.), not on screenshots of numeric tables — feeding it a
|
| 140 |
+
table grid drops recognition confidence to ~0.2 and produces the "no clear
|
| 141 |
+
oscillations" failure mode. So for common CSV shapes we render a proper
|
| 142 |
+
line or scatter plot; only truly arbitrary tables fall back to the grid.
|
| 143 |
+
"""
|
| 144 |
+
import matplotlib.pyplot as plt # noqa: F401 — keeps mpl import local
|
| 145 |
+
|
| 146 |
+
numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
|
| 147 |
+
|
| 148 |
+
# Two numeric columns: classic sweep (x, y). Covers Rabi / Ramsey / T1 / T2
|
| 149 |
+
# / resonator sweeps out of the box.
|
| 150 |
+
if len(numeric_cols) == 2:
|
| 151 |
+
x_col, y_col = numeric_cols
|
| 152 |
+
return _render_line(
|
| 153 |
+
df[y_col].to_numpy(),
|
| 154 |
+
df[x_col].to_numpy(),
|
| 155 |
+
experiment=experiment_type,
|
| 156 |
+
x_label=str(x_col),
|
| 157 |
+
y_label=str(y_col),
|
| 158 |
+
title=title,
|
| 159 |
+
fit=None,
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
# Readout IQ: two columns named like I/Q (any case, any order).
|
| 163 |
+
lower = {c.lower(): c for c in df.columns}
|
| 164 |
+
if "i" in lower and "q" in lower:
|
| 165 |
+
iq = df[[lower["i"], lower["q"]]].to_numpy()
|
| 166 |
+
return _render_scatter(iq, title=title)
|
| 167 |
+
|
| 168 |
+
# Single numeric column: plot vs row index.
|
| 169 |
+
if len(numeric_cols) == 1:
|
| 170 |
+
y_col = numeric_cols[0]
|
| 171 |
+
return _render_line(
|
| 172 |
+
df[y_col].to_numpy(),
|
| 173 |
+
None,
|
| 174 |
+
experiment=experiment_type,
|
| 175 |
+
x_label="sample index",
|
| 176 |
+
y_label=str(y_col),
|
| 177 |
+
title=title,
|
| 178 |
+
fit=None,
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# Fall back to the table screenshot for wide/categorical tables the VLM
|
| 182 |
+
# probably can't interpret anyway.
|
| 183 |
+
return _render_table_as_image(df)
|
| 184 |
+
|
| 185 |
+
|
| 186 |
def _fig_to_pil(fig) -> Image.Image:
|
| 187 |
import matplotlib.pyplot as plt
|
| 188 |
|
|
|
|
| 505 |
if ext in SUPPORTED_TABLE_EXTS:
|
| 506 |
sep = "," if ext == ".csv" else "\t"
|
| 507 |
df = pd.read_csv(path, sep=sep)
|
| 508 |
+
img = _render_csv_for_vlm(df, experiment_type=experiment_type, title=name)
|
| 509 |
return CalibrationPayload(image=img, table=df, source_name=name, kind="csv")
|
| 510 |
|
| 511 |
if ext == ".npy":
|