Hasarindu Perera commited on
Commit
df233ec
·
unverified ·
1 Parent(s): d0c761a

chore: update to v0.8.0

Browse files
Files changed (2) hide show
  1. app.py +817 -467
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,12 +1,11 @@
1
- """QuPrep — HuggingFace Spaces demo.
2
-
3
- Upload a CSV or use a built-in sample dataset, pick an encoding and export
4
- framework, and get a quantum circuit back — all in the browser.
5
- """
6
 
7
  from __future__ import annotations
8
 
9
  import io
 
 
 
10
  import traceback
11
 
12
  import gradio as gr
@@ -14,56 +13,87 @@ import numpy as np
14
  import pandas as pd
15
 
16
  # ---------------------------------------------------------------------------
17
- # Sample datasets (bundled — no network needed)
 
18
  # ---------------------------------------------------------------------------
19
 
20
- def _make_iris_csv() -> str:
21
  from sklearn.datasets import load_iris
22
  ds = load_iris(as_frame=True)
23
- df = ds.data.copy()
24
- return df.to_csv(index=False)
25
-
26
 
27
- def _make_heart_csv() -> str:
28
- """Small synthetic heart-disease-style dataset."""
29
  rng = np.random.default_rng(42)
30
  n = 50
31
- df = pd.DataFrame({
32
- "age": rng.integers(30, 75, n).astype(float),
33
- "trestbps": rng.integers(90, 180, n).astype(float),
34
- "chol": rng.integers(150, 350, n).astype(float),
35
- "thalach": rng.integers(90, 200, n).astype(float),
36
- "oldpeak": rng.uniform(0, 5, n).round(1),
37
- })
38
- return df.to_csv(index=False)
39
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  SAMPLES = {
42
- "Iris (150 samples, 4 features)": _make_iris_csv,
43
- "Synthetic Heart (50 samples, 5 features)": _make_heart_csv,
 
 
 
 
44
  }
45
 
46
  # ---------------------------------------------------------------------------
47
- # Encoding / framework metadata
48
  # ---------------------------------------------------------------------------
49
 
50
  ENCODINGS = [
51
- "angle",
52
- "entangled_angle",
53
- "amplitude",
54
- "basis",
55
- "iqp",
56
- "reupload",
57
- "hamiltonian",
58
- "zz_feature_map",
59
- "pauli_feature_map",
60
- "random_fourier",
61
- "tensor_product",
62
- "qaoa_problem",
63
  ]
64
 
65
- FRAMEWORKS = ["qasm", "qiskit", "pennylane", "cirq", "tket", "braket", "qsharp", "iqm"]
66
-
67
  ENCODING_DESC = {
68
  "angle": "Ry/Rx/Rz rotation per feature. NISQ-safe, depth O(1).",
69
  "entangled_angle": "Rotation + CNOT entangling layers. NISQ-safe.",
@@ -73,336 +103,616 @@ ENCODING_DESC = {
73
  "reupload": "Data re-uploading (Pérez-Salinas). High expressivity.",
74
  "hamiltonian": "Trotterized Hamiltonian evolution.",
75
  "zz_feature_map": "Qiskit-compatible ZZ feature map.",
76
- "pauli_feature_map":"Generalised Pauli feature map (configurable strings).",
77
  "random_fourier": "RBF kernel approximation via random Fourier features.",
78
  "tensor_product": "Ry+Rz per qubit — full Bloch sphere, qubit-efficient.",
79
  "qaoa_problem": "QAOA-inspired feature map. Features as cost Hamiltonian parameters.",
80
  }
81
 
82
- # ---------------------------------------------------------------------------
83
- # Core run function
84
- # ---------------------------------------------------------------------------
85
 
86
  _EMPTY_DF = pd.DataFrame()
87
 
 
 
 
88
 
89
- def run_quprep(
90
- csv_file,
91
- sample_name: str,
92
- encoding: str,
93
- framework: str,
94
- n_samples: int,
95
- n_qubits: int,
96
- ) -> tuple[pd.DataFrame, pd.DataFrame, str, str, str]:
97
  """
98
- Returns (input_preview, encoded_preview, circuit_output, cost_info, status_message).
 
 
99
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  try:
101
- import quprep as qd
102
- except ImportError:
103
- return _EMPTY_DF, _EMPTY_DF, "", "", "❌ quprep is not installed in this Space."
104
 
105
- # --- load data ---
106
- try:
107
- if csv_file is not None:
108
- # Gradio 6.x returns a filepath string; older versions return a file object
109
- csv_path = csv_file if isinstance(csv_file, str) else csv_file.name
110
- df = pd.read_csv(csv_path)
111
- elif sample_name and sample_name in SAMPLES:
112
- df = pd.read_csv(io.StringIO(SAMPLES[sample_name]()))
113
- else:
114
- return "", "", "⚠️ Please upload a CSV or select a sample dataset."
115
 
116
- # keep numeric columns only, drop NaN rows for simplicity
117
- df = df.select_dtypes(include="number").dropna()
118
- if df.empty:
119
- return "", "", "⚠️ No numeric columns found after cleaning."
120
 
121
- # limit samples
122
- df = df.head(max(1, n_samples))
 
123
 
124
- # write to a temp file so quprep can ingest it
125
- import tempfile, os
126
- with tempfile.NamedTemporaryFile(
127
- mode="w", suffix=".csv", delete=False, encoding="utf-8"
128
- ) as tmp:
129
- df.to_csv(tmp, index=False)
130
- tmp_path = tmp.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- except Exception as exc:
133
- return _EMPTY_DF, _EMPTY_DF, "", "", f"❌ Data loading error: {exc}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- # --- run pipeline ---
136
  try:
137
- kwargs: dict = {}
138
- if framework == "qasm":
139
- kwargs["framework"] = "qasm"
140
- else:
141
- kwargs["framework"] = framework
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- # hardware-aware reduction if qubit budget requested
144
- if n_qubits > 0 and df.shape[1] > n_qubits:
145
- from quprep.reduce.hardware_aware import HardwareAwareReducer
146
- reducer = HardwareAwareReducer(n_qubits=n_qubits)
147
- kwargs["reducer"] = reducer
148
 
149
- result = qd.prepare(tmp_path, encoding=encoding, **kwargs)
 
 
 
 
 
 
 
150
 
151
  except ImportError as exc:
152
- missing = str(exc)
153
- return _EMPTY_DF, _EMPTY_DF, "", "", (
154
- f"⚠️ Optional dependency not installed in this Space: {missing}\n"
155
- f"Try selecting **qasm** as the framework, or install the extra."
156
- )
157
- except Exception as exc:
158
- tb = traceback.format_exc()
159
- return _EMPTY_DF, _EMPTY_DF, "", "", f"❌ Pipeline error:\n{tb}"
160
- finally:
161
- try:
162
- os.unlink(tmp_path)
163
- except Exception:
164
- pass
165
 
166
- # --- format output ---
167
  circuits = result.circuits or []
168
  if not circuits:
169
  return _EMPTY_DF, _EMPTY_DF, "", "", "⚠️ No circuits produced."
170
 
171
- # input preview first 5 rows of cleaned df
172
- input_preview = df.head(5).round(4)
173
 
174
- # encoded parameters preview — parameters array from each encoded result
175
- encoded_list = result.encoded or []
176
- if encoded_list:
177
  try:
178
- rows = []
179
- for i, enc in enumerate(encoded_list[:5]):
180
- params = enc.parameters
181
- row = {f"q{j}": round(float(p), 4) for j, p in enumerate(params)}
182
- row = {"sample": i, **row}
183
- rows.append(row)
184
- encoded_preview = pd.DataFrame(rows).set_index("sample")
185
  except Exception:
186
- encoded_preview = _EMPTY_DF
187
- else:
188
- encoded_preview = _EMPTY_DF
189
 
190
- # show first circuit as text
191
  first = circuits[0]
192
  if isinstance(first, str):
193
  circuit_text = first
194
  else:
195
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  circuit_text = str(first)
197
- except Exception:
198
- circuit_text = repr(first)
199
 
200
- # cost info — rendered as an HTML card
201
  cost = result.cost
 
202
  if cost:
203
- nisq_badge = (
204
- '<span style="color:#4ade80;font-weight:600">✓ NISQ-safe</span>'
205
- if cost.nisq_safe else
206
- '<span style="color:#f87171;font-weight:600">✗ Not NISQ-safe</span>'
207
- )
208
- warning_html = (
209
- f'<p style="margin:8px 0 0;color:#fbbf24">⚠️ {cost.warning}</p>'
210
- if cost.warning else ""
211
- )
212
  cost_html = f"""
213
  <div style="font-family:monospace;font-size:0.9rem;line-height:1.8">
214
  <div style="display:grid;grid-template-columns:1fr 1fr;gap:4px 24px">
215
- <span style="color:#94a3b8">Encoding</span> <span>{cost.encoding}</span>
216
- <span style="color:#94a3b8">Qubits</span> <span>{cost.n_qubits}</span>
217
- <span style="color:#94a3b8">Gate count</span> <span>{cost.gate_count}</span>
218
- <span style="color:#94a3b8">Depth</span> <span>{cost.circuit_depth}</span>
219
- <span style="color:#94a3b8">2-qubit gates</span> <span>{cost.two_qubit_gates}</span>
220
- <span style="color:#94a3b8">NISQ</span> <span>{nisq_badge}</span>
221
- </div>
222
- {warning_html}
223
  </div>"""
224
- else:
225
- cost_html = "<p style='color:#94a3b8'>Cost estimate not available for this combination.</p>"
226
-
227
- n_total = len(circuits)
228
- status = (
229
- f"✓ {df.shape[0]} sample(s) × {df.shape[1]} feature(s) "
230
- f"→ {n_total} circuit(s) | showing sample 0"
231
- )
232
 
233
- return input_preview, encoded_preview, circuit_text, cost_html, status
 
 
 
 
 
234
 
235
 
236
  # ---------------------------------------------------------------------------
237
- # Recommendation tab
238
  # ---------------------------------------------------------------------------
239
 
240
- def run_recommend(csv_file, sample_name: str, task: str, n_qubits: int) -> str:
241
- try:
242
- import quprep as qd
243
- except ImportError:
244
- return "<p>❌ quprep is not installed in this Space.</p>"
245
-
246
  try:
247
- if csv_file is not None:
248
- csv_path = csv_file if isinstance(csv_file, str) else csv_file.name
249
- df = pd.read_csv(csv_path)
250
- elif sample_name and sample_name in SAMPLES:
251
- df = pd.read_csv(io.StringIO(SAMPLES[sample_name]()))
252
- else:
253
- return "<p>⚠️ Please upload a CSV or select a sample dataset.</p>"
254
-
255
- df = df.select_dtypes(include="number").dropna()
256
- if df.empty:
257
- return "<p>⚠️ No numeric columns found.</p>"
258
-
259
- import tempfile, os
260
- with tempfile.NamedTemporaryFile(
261
- mode="w", suffix=".csv", delete=False, encoding="utf-8"
262
- ) as tmp:
263
- df.to_csv(tmp, index=False)
264
- tmp_path = tmp.name
265
-
266
- qubits_arg = n_qubits if n_qubits > 0 else None
267
- rec = qd.recommend(tmp_path, task=task, qubits=qubits_arg)
268
-
269
- nisq_badge = (
270
- '<span style="color:#4ade80;font-weight:600">✓ Yes</span>'
271
- if rec.nisq_safe else
272
- '<span style="color:#f87171;font-weight:600">✗ No</span>'
273
- )
274
-
275
  alt_rows = "".join(
276
- f"""<tr>
277
- <td style="padding:6px 12px">{a.method}</td>
278
- <td style="padding:6px 12px;text-align:center">{a.score:.0f}</td>
279
- <td style="padding:6px 12px;color:#94a3b8">{a.depth}</td>
280
- </tr>"""
281
  for a in rec.alternatives
282
  )
283
- alt_html = f"""
284
- <div style="margin-top:20px">
285
- <p style="margin:0 0 8px;font-size:0.8rem;font-weight:600;color:#94a3b8;text-transform:uppercase;letter-spacing:.05em">Alternatives</p>
286
- <table style="width:100%;border-collapse:collapse;font-size:0.85rem">
287
- <thead>
288
- <tr style="border-bottom:1px solid #334155">
289
- <th style="padding:6px 12px;text-align:left;color:#64748b;font-weight:500">Encoding</th>
290
- <th style="padding:6px 12px;text-align:center;color:#64748b;font-weight:500">Score</th>
291
- <th style="padding:6px 12px;text-align:left;color:#64748b;font-weight:500">Depth</th>
292
- </tr>
293
- </thead>
294
- <tbody>{alt_rows}</tbody>
295
- </table>
296
- </div>""" if rec.alternatives else ""
297
-
298
  return f"""
299
  <div style="font-family:sans-serif;font-size:0.9rem;line-height:1.6">
300
  <div style="display:flex;align-items:baseline;gap:12px;margin-bottom:16px">
301
  <span style="font-size:1.6rem;font-weight:700;color:#e2e8f0">{rec.method}</span>
302
- <span style="font-size:0.8rem;color:#a78bfa;font-weight:600">recommended</span>
303
  </div>
304
  <div style="display:grid;grid-template-columns:auto 1fr;gap:4px 24px;margin-bottom:16px">
305
- <span style="color:#64748b">Qubits needed</span> <span>{rec.qubits}</span>
306
- <span style="color:#64748b">Circuit depth</span> <span style="font-family:monospace">{rec.depth}</span>
307
- <span style="color:#64748b">NISQ safe</span> <span>{nisq_badge}</span>
308
- <span style="color:#64748b">Score</span> <span>{rec.score:.0f}</span>
309
- </div>
310
- <div style="padding:12px 16px;background:#1e293b;border-radius:8px;color:#cbd5e1;font-size:0.85rem;line-height:1.6">
311
- {rec.reason}
312
  </div>
313
- {alt_html}
 
314
  </div>"""
315
-
316
  except Exception as exc:
317
  return f"<p>❌ {exc}</p>"
318
  finally:
319
- try:
320
- os.unlink(tmp_path)
321
- except Exception:
322
- pass
323
 
324
 
325
  # ---------------------------------------------------------------------------
326
- # Compare tab
327
  # ---------------------------------------------------------------------------
328
 
329
- def run_compare(csv_file, sample_name: str, task: str, n_qubits: int) -> str:
330
- try:
331
- import quprep as qd
332
- except ImportError:
333
- return "<p>❌ quprep is not installed in this Space.</p>"
334
-
335
  try:
336
- if csv_file is not None:
337
- csv_path = csv_file if isinstance(csv_file, str) else csv_file.name
338
- df = pd.read_csv(csv_path)
339
- elif sample_name and sample_name in SAMPLES:
340
- df = pd.read_csv(io.StringIO(SAMPLES[sample_name]()))
341
- else:
342
- return "<p>⚠️ Please upload a CSV or select a sample dataset.</p>"
343
-
344
- df = df.select_dtypes(include="number").dropna()
345
- if df.empty:
346
- return "<p>⚠️ No numeric columns found.</p>"
347
-
348
- import tempfile, os
349
- with tempfile.NamedTemporaryFile(
350
- mode="w", suffix=".csv", delete=False, encoding="utf-8"
351
- ) as tmp:
352
- df.to_csv(tmp, index=False)
353
- tmp_path = tmp.name
354
-
355
- qubits_arg = n_qubits if n_qubits > 0 else None
356
- result = qd.compare_encodings(tmp_path, task=task, qubits=qubits_arg)
357
-
358
  rows_html = ""
359
  for r in result.rows:
360
  nisq = '<span style="color:#4ade80">Yes</span>' if r.nisq_safe else '<span style="color:#f87171">No</span>'
361
  name = f"{r.encoding} ★" if r.encoding == result.recommended else r.encoding
362
- style = "background:#1e293b" if r.encoding == result.recommended else ""
363
- rows_html += f"""<tr style="{style}">
364
- <td style="padding:8px 14px;font-weight:{'600' if r.encoding == result.recommended else '400'}">{name}</td>
365
- <td style="padding:8px 14px;text-align:center">{r.n_qubits}</td>
366
- <td style="padding:8px 14px;text-align:center">{r.gate_count}</td>
367
- <td style="padding:8px 14px;text-align:center">{r.circuit_depth}</td>
368
- <td style="padding:8px 14px;text-align:center">{r.two_qubit_gates}</td>
369
- <td style="padding:8px 14px;text-align:center">{nisq}</td>
370
- </tr>"""
371
-
372
- footnote = "<p style='margin:12px 0 0;font-size:0.78rem;color:#475569'>★ recommended for the specified task / budget</p>" if result.recommended else ""
373
-
374
- warnings = [r for r in result.rows if r.warning]
375
- warn_html = "".join(
376
- f"<p style='margin:4px 0;font-size:0.78rem;color:#fbbf24'>⚠️ [{r.encoding}] {r.warning}</p>"
377
- for r in warnings
378
- )
379
-
380
  return f"""
381
  <div style="font-family:sans-serif;font-size:0.88rem">
382
  <table style="width:100%;border-collapse:collapse">
383
- <thead>
384
- <tr style="border-bottom:1px solid #334155">
385
- <th style="padding:8px 14px;text-align:left;color:#64748b;font-weight:500">Encoding</th>
386
- <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">Qubits</th>
387
- <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">Gate Count</th>
388
- <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">Depth</th>
389
- <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">2Q Gates</th>
390
- <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">NISQ Safe</th>
391
- </tr>
392
- </thead>
393
- <tbody>{rows_html}</tbody>
394
  </table>
395
- {footnote}
396
  {warn_html}
397
  </div>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  except Exception as exc:
400
  return f"<p>❌ {exc}</p>"
401
  finally:
402
- try:
403
- os.unlink(tmp_path)
404
- except Exception:
405
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
 
408
  # ---------------------------------------------------------------------------
@@ -410,245 +720,285 @@ def run_compare(csv_file, sample_name: str, task: str, n_qubits: int) -> str:
410
  # ---------------------------------------------------------------------------
411
 
412
  THEME = gr.themes.Soft(primary_hue="violet", secondary_hue="blue")
413
-
414
  CSS = """
415
- #info-panel, #data-panel {
416
- border: 1px solid #334155 !important;
417
- border-radius: 12px !important;
418
- padding: 20px 24px !important;
419
- box-sizing: border-box;
420
- min-height: 320px !important;
421
  }
422
- #data-panel {
423
- gap: 12px !important;
 
 
424
  }
425
  """
426
 
427
- with gr.Blocks(title="QuPrep Quantum Data Preparation", css=CSS) as demo:
 
 
428
 
 
429
  with gr.Row(equal_height=True):
430
- # ── Left: package info ─────────────────────────────────────────────
431
- with gr.Column(scale=1, elem_id="info-panel"):
432
- gr.HTML("""
433
- <div style="display:flex;flex-direction:column;justify-content:center">
434
- <p style="margin:0 0 2px;font-size:1.5rem;font-weight:700;color:#e2e8f0">⚛️ QuPrep</p>
435
- <p style="margin:0 0 14px;font-size:0.9rem;font-weight:500;color:#a78bfa">Quantum Data Preparation</p>
436
- <p style="margin:0 0 14px;font-size:0.85rem;color:#94a3b8;line-height:1.6">
437
- The missing preprocessing layer between classical datasets and quantum computing.
438
- Framework-agnostic: Qiskit · PennyLane · Cirq · TKET · Braket · Q# · IQM · OpenQASM 3.0.
439
- </p>
440
- <div style="display:flex;flex-direction:column;gap:8px;font-size:0.85rem">
441
- <div>📦 <code style="background:#1e293b;padding:2px 8px;border-radius:4px">pip install quprep</code></div>
442
- <div>📖 <a href="https://docs.quprep.org" target="_blank" style="color:#818cf8">docs.quprep.org</a></div>
443
- <div>💻 <a href="https://github.com/quprep/quprep" target="_blank" style="color:#818cf8">github.com/quprep/quprep</a></div>
444
- <div>🌐 <a href="https://quprep.org" target="_blank" style="color:#818cf8">quprep.org</a></div>
445
- </div>
446
- <p style="margin:14px 0 0;font-size:0.75rem;color:#475569">
447
- 12 encodings · 8 export frameworks · Apache 2.0 · Python ≥ 3.10
448
- </p>
449
- </div>
450
- """)
451
 
452
- # ── Right: data inputs ─────────────────────────────────────────────
453
- with gr.Column(scale=1, elem_id="data-panel"):
454
  gr.HTML("""
455
- <p style="margin:0 0 2px;font-size:1.5rem;font-weight:700;color:#e2e8f0">📂 Load Dataset</p>
456
- <p style="margin:0 0 4px;font-size:0.9rem;font-weight:500;color:#a78bfa">Upload a CSV or pick a sample</p>
457
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  csv_upload = gr.File(
459
- label="Upload CSV (.csv / .tsv)",
460
  file_types=[".csv", ".tsv"],
461
- height=150,
 
462
  )
463
- sample_dd = gr.Dropdown(
464
- choices=[""] + list(SAMPLES.keys()),
465
- value="Iris (150 samples, 4 features)",
466
- label="Or use a built-in sample dataset",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  )
468
- gr.HTML("""
469
- <p style="margin:0;font-size:0.75rem;color:#475569">Uploaded file takes priority over the sample selector.</p>
470
- """)
471
 
 
 
 
 
 
 
 
 
 
472
  with gr.Tabs():
473
 
474
- # ── Tab 1: Convert ──────────────────────────────────────────────────
475
  with gr.TabItem("Convert"):
476
  with gr.Row():
477
-
478
- # ── Left: controls ──────────────────────────────────────────
479
- with gr.Column(scale=1, min_width=200):
480
- enc_dd = gr.Dropdown(
481
- choices=ENCODINGS, value="angle", label="Encoding",
482
- )
483
- enc_info = gr.Markdown(
484
- f"<small><i>{ENCODING_DESC['angle']}</i></small>"
485
- )
486
- fw_dd = gr.Dropdown(
487
- choices=FRAMEWORKS, value="qasm", label="Export framework",
488
- )
489
- n_samples_sl = gr.Slider(
490
- minimum=1, maximum=20, value=1, step=1,
491
- label="Samples to encode",
492
- )
493
- n_qubits_sl = gr.Slider(
494
- minimum=0, maximum=20, value=0, step=1,
495
- label="Qubit budget (0 = no reduction)",
496
- )
497
- convert_btn = gr.Button("Convert →", variant="primary", size="lg")
498
-
499
- # ── Right: results ─────────────────────────────────────────��
500
  with gr.Column(scale=4):
501
- status_out = gr.Textbox(
502
- label="", lines=1, max_lines=1,
503
- interactive=False, show_label=False,
504
- placeholder="Press Convert → to run",
505
- )
506
-
507
- # top row — circuit + cost side by side
508
  with gr.Row(equal_height=True):
509
  with gr.Column(scale=3):
510
- circuit_out = gr.Code(
511
- label="Circuit output (sample 0)",
512
- language="python", lines=18,
513
- )
514
  with gr.Column(scale=1):
515
- cost_out = gr.HTML(label="Cost estimate")
 
 
 
516
 
517
- # bottom row — input data + encoded parameters side by side
518
- with gr.Row(equal_height=True):
519
- with gr.Column(scale=1):
520
- input_table = gr.Dataframe(
521
- label="Input data (first 5 rows)", interactive=False,
522
- )
523
- with gr.Column(scale=1):
524
- encoded_table = gr.Dataframe(
525
- label="Encoded parameters (first 5 rows)", interactive=False,
526
- )
527
-
528
- enc_dd.change(
529
- fn=lambda e: f"<small><i>{ENCODING_DESC.get(e, '')}</i></small>",
530
- inputs=enc_dd,
531
- outputs=enc_info,
532
- )
533
 
534
- convert_btn.click(
535
- fn=run_quprep,
536
- inputs=[csv_upload, sample_dd, enc_dd, fw_dd, n_samples_sl, n_qubits_sl],
537
- outputs=[input_table, encoded_table, circuit_out, cost_out, status_out],
538
- )
 
539
 
540
- # ── Tab 2: Recommend ────────────────────────────────────────────────
541
  with gr.TabItem("Recommend"):
542
  with gr.Row():
543
  with gr.Column(scale=1):
544
- task_dd = gr.Dropdown(
545
- choices=["classification", "regression", "kernel", "qaoa", "simulation"],
546
- value="classification",
547
- label="Task",
548
- )
549
- rec_qubits_sl = gr.Slider(
550
- minimum=0, maximum=20, value=0, step=1,
551
- label="Qubit budget (0 = no limit)",
552
- )
553
- rec_btn = gr.Button("Recommend →", variant="primary")
554
  with gr.Column(scale=2):
555
- rec_out = gr.HTML(
556
- label="Recommendation",
557
- value="""
558
- <div style="font-family:sans-serif;color:#475569;font-size:0.9rem;padding:24px 0">
559
- <p style="margin:0 0 8px;font-size:1rem;font-weight:600;color:#64748b">No recommendation yet</p>
560
- <p style="margin:0;line-height:1.6">Select a task, set an optional qubit budget, and click <strong>Recommend →</strong> to get a dataset-aware encoding suggestion with ranked alternatives.</p>
561
- </div>""",
562
- )
563
-
564
- rec_btn.click(
565
- fn=run_recommend,
566
- inputs=[csv_upload, sample_dd, task_dd, rec_qubits_sl],
567
- outputs=rec_out,
568
- )
569
 
570
- # ── Tab 3: Compare ──────────────────────────────────────────────────
 
 
 
 
 
571
  with gr.TabItem("Compare encoders"):
572
  with gr.Row():
573
  with gr.Column(scale=1):
574
- cmp_task_dd = gr.Dropdown(
575
- choices=["classification", "regression", "kernel", "qaoa", "simulation"],
576
- value="classification",
577
- label="Task",
578
- )
579
- cmp_qubits_sl = gr.Slider(
580
- minimum=0, maximum=20, value=8, step=1,
581
- label="Qubit budget (0 = no limit)",
582
- )
583
- cmp_btn = gr.Button("Compare →", variant="primary")
584
  with gr.Column(scale=2):
585
- cmp_out = gr.HTML(
586
- label="Comparison table",
587
- value="""
588
- <div style="font-family:sans-serif;color:#475569;font-size:0.9rem;padding:24px 0">
589
- <p style="margin:0 0 8px;font-size:1rem;font-weight:600;color:#64748b">No comparison yet</p>
590
- <p style="margin:0;line-height:1.6">Select a task, set an optional qubit budget, and click <strong>Compare →</strong> to see a side-by-side cost breakdown for all encoders.</p>
591
- </div>""",
592
- )
593
-
594
- cmp_btn.click(
595
- fn=run_compare,
596
- inputs=[csv_upload, sample_dd, cmp_task_dd, cmp_qubits_sl],
597
- outputs=cmp_out,
598
- )
599
 
600
- # ── Tab 4: About ────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  with gr.TabItem("About"):
602
- gr.Markdown(
603
- """
604
  ## About QuPrep
605
 
606
- QuPrep is an open-source Python library that converts classical tabular datasets
607
- into quantum-circuit-ready formats. It is **not** a quantum computing framework,
608
- simulator, or training tool — it is the preprocessing step that feeds into your
609
- chosen quantum framework.
610
 
611
  ### Pipeline
612
-
613
  ```
614
- Ingest → Clean → Reduce → Normalise → Encode → Export
615
  ```
 
 
616
 
617
- Each stage is optional and independently composable. The entire pipeline runs
618
- with a single call:
619
-
620
- ```python
621
- import quprep as qd
622
- result = qd.prepare("data.csv", encoding="angle", framework="qiskit")
623
- ```
624
-
625
- ### Supported encodings (12)
626
-
627
  | Encoding | Qubits | NISQ-safe |
628
  |---|---|---|
629
- | Angle (Ry/Rx/Rz) | d | ✓ |
630
- | Amplitude | ⌈log₂ d⌉ | ✗ |
631
  | Basis | d | ✓ |
632
  | IQP | d | conditional |
633
  | Entangled Angle | d | ✓ |
634
- | Data Re-uploading | d | ✓ |
635
  | Hamiltonian | d | ✗ |
636
  | ZZ Feature Map | d | conditional |
637
  | Pauli Feature Map | d | conditional |
638
  | Random Fourier | n_components | ✓ |
639
  | Tensor Product | ⌈d/2⌉ | ✓ |
640
- | QAOA Problem | d | ✓ (linear) |
 
641
 
642
  ### Links
 
 
 
 
643
 
644
- - 📦 PyPI: [pypi.org/project/quprep](https://pypi.org/project/quprep/)
645
- - 📖 Docs: [docs.quprep.org](https://docs.quprep.org)
646
- - 🌐 Website: [quprep.org](https://quprep.org)
647
- - 💻 Source: [github.com/quprep/quprep](https://github.com/quprep/quprep)
648
-
649
- Apache 2.0 license · Python ≥ 3.10
650
- """
651
- )
652
 
653
  if __name__ == "__main__":
654
- demo.launch(theme=THEME)
 
1
+ """QuPrep — HuggingFace Spaces demo (v0.8.0)"""
 
 
 
 
2
 
3
  from __future__ import annotations
4
 
5
  import io
6
+ import json
7
+ import os
8
+ import tempfile
9
  import traceback
10
 
11
  import gradio as gr
 
13
  import pandas as pd
14
 
15
  # ---------------------------------------------------------------------------
16
+ # Sample registry {label: (type, loader_fn)}
17
+ # type: tabular | image | text | graph | timeseries
18
  # ---------------------------------------------------------------------------
19
 
20
+ def _iris():
21
  from sklearn.datasets import load_iris
22
  ds = load_iris(as_frame=True)
23
+ return "tabular", ds.data.copy().values.astype(float), list(ds.data.columns)
 
 
24
 
25
+ def _heart():
 
26
  rng = np.random.default_rng(42)
27
  n = 50
28
+ cols = ["age", "trestbps", "chol", "thalach", "oldpeak"]
29
+ X = np.column_stack([
30
+ rng.integers(30, 75, n).astype(float),
31
+ rng.integers(90, 180, n).astype(float),
32
+ rng.integers(150, 350, n).astype(float),
33
+ rng.integers(90, 200, n).astype(float),
34
+ rng.uniform(0, 5, n).round(1),
35
+ ])
36
+ return "tabular", X, cols
37
+
38
+ def _digits():
39
+ from sklearn.datasets import load_digits
40
+ d = load_digits()
41
+ X = d.images[:8].reshape(8, -1).astype(float) / 16.0
42
+ cols = [f"px{i}" for i in range(X.shape[1])]
43
+ return "image", X, cols
44
+
45
+ def _timeseries():
46
+ rng = np.random.default_rng(42)
47
+ t = np.linspace(0, 4 * np.pi, 120)
48
+ X = np.column_stack([
49
+ np.sin(t) + rng.normal(0, 0.05, 120),
50
+ np.cos(t) + rng.normal(0, 0.05, 120),
51
+ t / (4 * np.pi) + rng.normal(0, 0.02, 120),
52
+ ])
53
+ return "timeseries", X, ["sine", "cosine", "trend"]
54
+
55
+ def _graph():
56
+ # Petersen-like 6-node molecule graph adjacency
57
+ adj = np.array([
58
+ [0,1,1,0,0,1],
59
+ [1,0,1,1,0,0],
60
+ [1,1,0,0,1,0],
61
+ [0,1,0,0,1,1],
62
+ [0,0,1,1,0,1],
63
+ [1,0,0,1,1,0],
64
+ ], dtype=float)
65
+ return "graph", adj, [f"node{i}" for i in range(6)]
66
+
67
+ def _text():
68
+ sentences = [
69
+ "Quantum computing processes information using quantum bits.",
70
+ "Machine learning models learn patterns from data.",
71
+ "Quantum machine learning combines both fields.",
72
+ "Data preprocessing is essential before encoding.",
73
+ "Entanglement allows quantum correlations between qubits.",
74
+ "Classical data must be normalized before amplitude encoding.",
75
+ ]
76
+ return "text", sentences, []
77
 
78
  SAMPLES = {
79
+ "Iris (tabular · 150×4)": _iris,
80
+ "Synthetic Heart (tabular · 50×5)": _heart,
81
+ "Digits (image · 8 samples, 64 px)": _digits,
82
+ "Sine / cosine (time series · 120t)": _timeseries,
83
+ "Molecule (graph · 6 nodes)": _graph,
84
+ "Quantum sentences (text · 6)": _text,
85
  }
86
 
87
  # ---------------------------------------------------------------------------
88
+ # Encodings / frameworks
89
  # ---------------------------------------------------------------------------
90
 
91
  ENCODINGS = [
92
+ "angle", "entangled_angle", "amplitude", "basis", "iqp",
93
+ "reupload", "hamiltonian", "zz_feature_map", "pauli_feature_map",
94
+ "random_fourier", "tensor_product", "qaoa_problem",
 
 
 
 
 
 
 
 
 
95
  ]
96
 
 
 
97
  ENCODING_DESC = {
98
  "angle": "Ry/Rx/Rz rotation per feature. NISQ-safe, depth O(1).",
99
  "entangled_angle": "Rotation + CNOT entangling layers. NISQ-safe.",
 
103
  "reupload": "Data re-uploading (Pérez-Salinas). High expressivity.",
104
  "hamiltonian": "Trotterized Hamiltonian evolution.",
105
  "zz_feature_map": "Qiskit-compatible ZZ feature map.",
106
+ "pauli_feature_map": "Generalised Pauli feature map (configurable strings).",
107
  "random_fourier": "RBF kernel approximation via random Fourier features.",
108
  "tensor_product": "Ry+Rz per qubit — full Bloch sphere, qubit-efficient.",
109
  "qaoa_problem": "QAOA-inspired feature map. Features as cost Hamiltonian parameters.",
110
  }
111
 
112
+ FRAMEWORKS = ["qasm", "qiskit", "pennylane", "cirq", "tket", "braket", "qsharp", "iqm"]
113
+
114
+ TASKS = ["classification", "regression", "kernel", "qaoa", "simulation"]
115
 
116
  _EMPTY_DF = pd.DataFrame()
117
 
118
+ # ---------------------------------------------------------------------------
119
+ # Data loading — single function, explicit source
120
+ # ---------------------------------------------------------------------------
121
 
122
+ def load_data(source: str, csv_file, sample_name: str, hf_name: str, hf_split: str):
 
 
 
 
 
 
 
123
  """
124
+ Returns (dtype, X, columns, status_msg).
125
+ dtype: tabular | image | timeseries | graph | text
126
+ X: np.ndarray for numeric types, list[str] for text, np.ndarray for graph adj
127
  """
128
+ if source == "upload":
129
+ if csv_file is None:
130
+ raise ValueError("No file uploaded.")
131
+ path = csv_file if isinstance(csv_file, str) else csv_file.name
132
+ df = pd.read_csv(path).select_dtypes(include="number").dropna()
133
+ if df.empty:
134
+ raise ValueError("No numeric columns found in uploaded file.")
135
+ return "tabular", df.values.astype(float), list(df.columns)
136
+
137
+ elif source == "sample":
138
+ if not sample_name or sample_name not in SAMPLES:
139
+ raise ValueError("Select a sample dataset.")
140
+ return SAMPLES[sample_name]()
141
+
142
+ elif source == "huggingface":
143
+ if not hf_name or not hf_name.strip():
144
+ raise ValueError("Enter a HuggingFace dataset name.")
145
+ from quprep.ingest.huggingface_ingester import HuggingFaceIngester
146
+ ingester = HuggingFaceIngester(modality="auto", split=hf_split or "train")
147
+ dataset = ingester.load(hf_name.strip())
148
+ X = dataset.data
149
+ if hasattr(X, "values"):
150
+ X = X.values
151
+ X = X.astype(float)
152
+ # map HF modality metadata → internal dtype
153
+ _modality_map = {"image": "image", "text": "text",
154
+ "time_series": "timeseries", "tabular": "tabular"}
155
+ hf_modality = dataset.metadata.get("modality", "tabular")
156
+ dtype = _modality_map.get(hf_modality, "tabular")
157
+ return dtype, X, [f"f{i}" for i in range(X.shape[1])]
158
+
159
+ raise ValueError(f"Unknown source: {source}")
160
+
161
+
162
+ def _write_tmp(X: np.ndarray) -> str:
163
+ f = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, encoding="utf-8")
164
+ pd.DataFrame(X).to_csv(f, index=False)
165
+ f.close()
166
+ return f.name
167
+
168
+
169
+ def _rm(path):
170
  try:
171
+ os.unlink(path)
172
+ except Exception:
173
+ pass
174
 
 
 
 
 
 
 
 
 
 
 
175
 
176
+ def _nisq(ok: bool) -> str:
177
+ return ('<span style="color:#4ade80;font-weight:600">✓ NISQ-safe</span>'
178
+ if ok else '<span style="color:#f87171;font-weight:600">✗ Not NISQ-safe</span>')
 
179
 
180
+ # ---------------------------------------------------------------------------
181
+ # Convert handles all data types
182
+ # ---------------------------------------------------------------------------
183
 
184
+ def _get_exporter(framework: str):
185
+ """Return an exporter instance for the given framework name."""
186
+ import quprep as qd
187
+ _map = {
188
+ "qasm": lambda: qd.QASMExporter(),
189
+ "qiskit": lambda: __import__("quprep.export.qiskit_export", fromlist=["QiskitExporter"]).QiskitExporter(),
190
+ "pennylane": lambda: __import__("quprep.export.pennylane_export", fromlist=["PennyLaneExporter"]).PennyLaneExporter(),
191
+ "cirq": lambda: __import__("quprep.export.cirq_export", fromlist=["CirqExporter"]).CirqExporter(),
192
+ "tket": lambda: __import__("quprep.export.tket_export", fromlist=["TKETExporter"]).TKETExporter(),
193
+ "braket": lambda: __import__("quprep.export.braket_export", fromlist=["BraketExporter"]).BraketExporter(),
194
+ "qsharp": lambda: __import__("quprep.export.qsharp_export", fromlist=["QSharpExporter"]).QSharpExporter(),
195
+ "iqm": lambda: __import__("quprep.export.iqm_export", fromlist=["IQMExporter"]).IQMExporter(),
196
+ }
197
+ return _map.get(framework, _map["qasm"])()
198
+
199
+ def _prepare_rff(X_full, X_slice, framework):
200
+ """Manually fit RandomFourierEncoder on full data and encode/export the slice."""
201
+ from quprep.encode.random_fourier import RandomFourierEncoder
202
+ from quprep.core.dataset import Dataset
203
+ enc = RandomFourierEncoder()
204
+ enc.fit(X_full)
205
+ ds = Dataset(data=X_slice,
206
+ feature_names=[f"f{i}" for i in range(X_slice.shape[1])],
207
+ feature_types=["continuous"] * X_slice.shape[1],
208
+ metadata={})
209
+ encoded_list = enc.encode_batch(ds)
210
+ exporter = _get_exporter(framework)
211
+ circuits = [exporter.export(e) for e in encoded_list]
212
+
213
+ from quprep.validation.cost import CostEstimate
214
+ n_qubits = enc.n_components
215
+ cost = CostEstimate(
216
+ encoding="random_fourier",
217
+ n_features=X_slice.shape[1],
218
+ n_qubits=n_qubits,
219
+ gate_count=n_qubits,
220
+ circuit_depth=1,
221
+ two_qubit_gates=0,
222
+ nisq_safe=True,
223
+ warning=None,
224
+ )
225
 
226
+ class _FakeResult:
227
+ pass
228
+ r = _FakeResult()
229
+ r.circuits = circuits
230
+ r.encoded = encoded_list
231
+ r.cost = cost
232
+ return r
233
+
234
+ def _reducer_kwargs(X_slice, X_full, n_qubits):
235
+ """Return (kwargs_dict, clamp_note) — applies PCA reducer if budget < n_features.
236
+ Clamps budget to min(n_samples, n_features) when PCA limit is hit."""
237
+ if n_qubits <= 0 or X_full.shape[1] <= n_qubits:
238
+ return {}, None
239
+ effective = min(n_qubits, X_slice.shape[0], X_full.shape[1])
240
+ note = (f"⚠️ Qubit budget clamped {n_qubits}→{effective} "
241
+ f"(PCA limit: min(samples={X_slice.shape[0]}, features={X_full.shape[1]})). "
242
+ f"Increase Samples slider for a higher budget.") if effective < n_qubits else None
243
+ from quprep.reduce.hardware_aware import HardwareAwareReducer
244
+ return {"preprocessor": HardwareAwareReducer(backend=effective)}, note
245
+
246
+ def _encode_tabular(X, encoding, framework, n_samples, n_qubits):
247
+ import quprep as qd
248
+ X_slice = X[:max(1, n_samples)]
249
+ if encoding == "random_fourier":
250
+ return _prepare_rff(X, X_slice, framework), None
251
+ kw, note = _reducer_kwargs(X_slice, X, n_qubits)
252
+ return qd.prepare(X_slice, encoding=encoding, framework=framework, **kw), note
253
+
254
+ def _encode_image(X, encoding, framework, n_samples, n_qubits=0):
255
+ import quprep as qd
256
+ X_slice = X[:max(1, n_samples)]
257
+ if encoding == "random_fourier":
258
+ return _prepare_rff(X, X_slice, framework), None
259
+ kw, note = _reducer_kwargs(X_slice, X, n_qubits)
260
+ return qd.prepare(X_slice, encoding=encoding, framework=framework, **kw), note
261
+
262
+ def _encode_timeseries(X, encoding, framework, n_samples, n_qubits=0, window=4):
263
+ import quprep as qd
264
+ from quprep.preprocess.window import WindowTransformer
265
+ from quprep.core.dataset import Dataset
266
+ ds = Dataset(data=X, feature_names=[f"t{i}" for i in range(X.shape[1])],
267
+ feature_types=["continuous"] * X.shape[1], metadata={})
268
+ ds_win = WindowTransformer(window_size=window).transform(ds)
269
+ X_win = ds_win.data
270
+ X_slice = X_win[:max(1, n_samples)]
271
+ if encoding == "random_fourier":
272
+ return _prepare_rff(X_win, X_slice, framework), None
273
+ kw, note = _reducer_kwargs(X_slice, X_win, n_qubits)
274
+ return qd.prepare(X_slice, encoding=encoding, framework=framework, **kw), note
275
+
276
+ def _encode_text(sentences, encoding, framework, n_qubits=0):
277
+ import quprep as qd
278
+ from quprep.ingest.text_ingester import TextIngester
279
+ dataset = TextIngester(method="tfidf", max_features=8).load(sentences)
280
+ X = dataset.data
281
+ if encoding == "random_fourier":
282
+ return _prepare_rff(X, X, framework), None
283
+ kw, note = _reducer_kwargs(X, X, n_qubits)
284
+ return qd.prepare(X, encoding=encoding, framework=framework, **kw), note
285
+
286
+ def _encode_graph(adj):
287
+ import quprep as qd
288
+ from quprep.encode.graph_state import GraphStateEncoder
289
+ enc = GraphStateEncoder()
290
+ encoded = enc._from_adj(adj)
291
+ return qd.QASMExporter().export(encoded)
292
+
293
+
294
+ def run_convert(source, csv_file, sample_name, hf_name, hf_split,
295
+ encoding, framework, n_samples, n_qubits):
296
+ try:
297
+ import quprep as qd
298
+ except ImportError:
299
+ return _EMPTY_DF, _EMPTY_DF, "", "", "❌ quprep not installed."
300
 
 
301
  try:
302
+ dtype, X, cols = load_data(source, csv_file, sample_name, hf_name, hf_split)
303
+ except Exception as exc:
304
+ return _EMPTY_DF, _EMPTY_DF, "", "", f"❌ {exc}"
305
+
306
+ # Early check: PennyLane's circuit drawer is recursive and crashes on large circuits.
307
+ # Estimate qubit count from data shape + encoding before spending time encoding.
308
+ if framework == "pennylane" and dtype not in ("graph", "text"):
309
+ _enc_qubits = {
310
+ "amplitude": int(np.ceil(np.log2(max(X.shape[1], 2)))),
311
+ "random_fourier": 8, # default n_components
312
+ }
313
+ est_qubits = _enc_qubits.get(encoding, X.shape[1]) # most encoders use n_features qubits
314
+ if est_qubits > 100:
315
+ msg = (f"⚠️ PennyLane's circuit drawer uses recursion and will crash at this scale "
316
+ f"({est_qubits} qubits estimated). Switch to framework=qasm to see the circuit.")
317
+ return _EMPTY_DF, _EMPTY_DF, "", "", msg
318
+
319
+ clamp_note = None
320
+ try:
321
+ if dtype == "graph":
322
+ circuit_text = _encode_graph(X)
323
+ n_nodes = X.shape[0]
324
+ edges = int((X != 0).sum() // 2)
325
+ status = f"✓ Graph: {n_nodes} nodes, {edges} edges → GraphState | 1 circuit (qubit budget ignored — graph state requires exactly 1 qubit per node)"
326
+ preview = pd.DataFrame(X, columns=cols, index=cols).round(0).astype(int)
327
+ return preview, _EMPTY_DF, circuit_text, "", status
328
 
329
+ elif dtype == "text":
330
+ result, clamp_note = _encode_text(X, encoding, framework, n_qubits)
 
 
 
331
 
332
+ elif dtype == "image":
333
+ result, clamp_note = _encode_image(X, encoding, framework, n_samples, n_qubits)
334
+
335
+ elif dtype == "timeseries":
336
+ result, clamp_note = _encode_timeseries(X, encoding, framework, n_samples, n_qubits)
337
+
338
+ else: # tabular
339
+ result, clamp_note = _encode_tabular(X, encoding, framework, n_samples, n_qubits)
340
 
341
  except ImportError as exc:
342
+ return _EMPTY_DF, _EMPTY_DF, "", "", f"⚠️ Optional dep missing: {exc}\nTry framework=qasm."
343
+ except ValueError as exc:
344
+ return _EMPTY_DF, _EMPTY_DF, "", "", f"⚠️ {exc}"
345
+ except Exception:
346
+ return _EMPTY_DF, _EMPTY_DF, "", "", f"❌ {traceback.format_exc()}"
 
 
 
 
 
 
 
 
347
 
 
348
  circuits = result.circuits or []
349
  if not circuits:
350
  return _EMPTY_DF, _EMPTY_DF, "", "", "⚠️ No circuits produced."
351
 
352
+ X_num = X if isinstance(X, np.ndarray) else np.array([])
353
+ preview = pd.DataFrame(X_num[:5], columns=cols[:X_num.shape[1]] if cols else None).round(4) if X_num.ndim == 2 else _EMPTY_DF
354
 
355
+ enc_preview = _EMPTY_DF
356
+ if result.encoded:
 
357
  try:
358
+ rows = [{"sample": i, **{f"q{j}": round(float(p), 4) for j, p in enumerate(e.parameters)}}
359
+ for i, e in enumerate(result.encoded[:5])]
360
+ enc_preview = pd.DataFrame(rows).set_index("sample")
 
 
 
 
361
  except Exception:
362
+ pass
 
 
363
 
 
364
  first = circuits[0]
365
  if isinstance(first, str):
366
  circuit_text = first
367
  else:
368
+ type_name = type(first).__name__
369
+ if type_name == "QNode":
370
+ import pennylane as qml
371
+ try:
372
+ circuit_text = qml.draw(first)()
373
+ except RecursionError:
374
+ n_w = len(first.device.wires)
375
+ circuit_text = (
376
+ f"# Circuit has {n_w} wires — too large for PennyLane's drawer.\n"
377
+ f"# PennyLane draws circuits recursively and hits Python's recursion\n"
378
+ f"# limit at this scale. Try a smaller dataset or fewer samples,\n"
379
+ f"# or switch to the qasm framework to see the full circuit."
380
+ )
381
+ elif type_name == "Circuit" and hasattr(first, "num_qubits"):
382
+ # pytket Circuit
383
+ try:
384
+ from pytket.qasm import circuit_to_qasm_str
385
+ circuit_text = circuit_to_qasm_str(first)
386
+ except Exception:
387
+ circuit_text = str(first)
388
+ else:
389
  circuit_text = str(first)
 
 
390
 
 
391
  cost = result.cost
392
+ cost_html = ""
393
  if cost:
394
+ warn = f'<p style="color:#fbbf24;margin:8px 0 0">⚠️ {cost.warning}</p>' if cost.warning else ""
 
 
 
 
 
 
 
 
395
  cost_html = f"""
396
  <div style="font-family:monospace;font-size:0.9rem;line-height:1.8">
397
  <div style="display:grid;grid-template-columns:1fr 1fr;gap:4px 24px">
398
+ <span style="color:#94a3b8">Encoding</span> <span>{cost.encoding}</span>
399
+ <span style="color:#94a3b8">Qubits</span> <span>{cost.n_qubits}</span>
400
+ <span style="color:#94a3b8">Gates</span> <span>{cost.gate_count}</span>
401
+ <span style="color:#94a3b8">Depth</span> <span>{cost.circuit_depth}</span>
402
+ <span style="color:#94a3b8">2Q gates</span> <span>{cost.two_qubit_gates}</span>
403
+ <span style="color:#94a3b8">NISQ</span> <span>{_nisq(cost.nisq_safe)}</span>
404
+ </div>{warn}
 
405
  </div>"""
 
 
 
 
 
 
 
 
406
 
407
+ n_circ = len(circuits)
408
+ shape = f"{X_num.shape[0]}×{X_num.shape[1]}" if X_num.ndim == 2 else str(len(X))
409
+ status = f"✓ {dtype} · {shape} → {n_circ} circuit(s) | showing sample 0"
410
+ if clamp_note:
411
+ status = clamp_note
412
+ return preview, enc_preview, circuit_text, cost_html, status
413
 
414
 
415
  # ---------------------------------------------------------------------------
416
+ # Recommend
417
  # ---------------------------------------------------------------------------
418
 
419
+ def run_recommend(source, csv_file, sample_name, hf_name, hf_split, task, n_qubits):
420
+ import quprep as qd
421
+ tmp = None
 
 
 
422
  try:
423
+ dtype, X, _ = load_data(source, csv_file, sample_name, hf_name, hf_split)
424
+ if dtype not in ("tabular", "image"):
425
+ return "<p style='color:#fbbf24'>⚠️ Recommendation works on tabular/image data.</p>"
426
+ tmp = _write_tmp(X)
427
+ rec = qd.recommend(tmp, task=task, qubits=n_qubits if n_qubits > 0 else None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  alt_rows = "".join(
429
+ f"<tr><td style='padding:6px 12px'>{a.method}</td>"
430
+ f"<td style='padding:6px 12px;text-align:center'>{a.score:.0f}</td>"
431
+ f"<td style='padding:6px 12px;color:#94a3b8'>{a.depth}</td></tr>"
 
 
432
  for a in rec.alternatives
433
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  return f"""
435
  <div style="font-family:sans-serif;font-size:0.9rem;line-height:1.6">
436
  <div style="display:flex;align-items:baseline;gap:12px;margin-bottom:16px">
437
  <span style="font-size:1.6rem;font-weight:700;color:#e2e8f0">{rec.method}</span>
438
+ <span style="color:#a78bfa;font-weight:600;font-size:0.8rem">recommended</span>
439
  </div>
440
  <div style="display:grid;grid-template-columns:auto 1fr;gap:4px 24px;margin-bottom:16px">
441
+ <span style="color:#64748b">Qubits</span> <span>{rec.qubits}</span>
442
+ <span style="color:#64748b">Depth</span> <span style="font-family:monospace">{rec.depth}</span>
443
+ <span style="color:#64748b">NISQ</span> <span>{_nisq(rec.nisq_safe)}</span>
444
+ <span style="color:#64748b">Score</span> <span>{rec.score:.0f}</span>
 
 
 
445
  </div>
446
+ <div style="padding:12px 16px;background:#1e293b;border-radius:8px;color:#cbd5e1;font-size:0.85rem;line-height:1.6">{rec.reason}</div>
447
+ {"<div style='margin-top:20px'><table style='width:100%;border-collapse:collapse;font-size:0.85rem'><thead><tr style='border-bottom:1px solid #334155'><th style='padding:6px 12px;text-align:left;color:#64748b;font-weight:500'>Encoding</th><th style='padding:6px 12px;text-align:center;color:#64748b;font-weight:500'>Score</th><th style='padding:6px 12px;color:#64748b;font-weight:500'>Depth</th></tr></thead><tbody>" + alt_rows + "</tbody></table></div>" if alt_rows else ""}
448
  </div>"""
 
449
  except Exception as exc:
450
  return f"<p>❌ {exc}</p>"
451
  finally:
452
+ if tmp: _rm(tmp)
 
 
 
453
 
454
 
455
  # ---------------------------------------------------------------------------
456
+ # Compare
457
  # ---------------------------------------------------------------------------
458
 
459
+ def run_compare(source, csv_file, sample_name, hf_name, hf_split, task, n_qubits):
460
+ import quprep as qd
461
+ tmp = None
 
 
 
462
  try:
463
+ dtype, X, _ = load_data(source, csv_file, sample_name, hf_name, hf_split)
464
+ if dtype not in ("tabular", "image"):
465
+ return "<p style='color:#fbbf24'>⚠️ Comparison works on tabular/image data.</p>"
466
+ tmp = _write_tmp(X)
467
+ result = qd.compare_encodings(tmp, task=task, qubits=n_qubits if n_qubits > 0 else None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  rows_html = ""
469
  for r in result.rows:
470
  nisq = '<span style="color:#4ade80">Yes</span>' if r.nisq_safe else '<span style="color:#f87171">No</span>'
471
  name = f"{r.encoding} ★" if r.encoding == result.recommended else r.encoding
472
+ bg = "background:#1e293b;" if r.encoding == result.recommended else ""
473
+ rows_html += f"<tr style='{bg}'><td style='padding:8px 14px;font-weight:{'600' if r.encoding == result.recommended else '400'}'>{name}</td><td style='padding:8px 14px;text-align:center'>{r.n_qubits}</td><td style='padding:8px 14px;text-align:center'>{r.gate_count}</td><td style='padding:8px 14px;text-align:center'>{r.circuit_depth}</td><td style='padding:8px 14px;text-align:center'>{r.two_qubit_gates}</td><td style='padding:8px 14px;text-align:center'>{nisq}</td></tr>"
474
+ warn_html = "".join(f"<p style='color:#fbbf24;font-size:0.78rem'>⚠️ [{r.encoding}] {r.warning}</p>" for r in result.rows if r.warning)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
  return f"""
476
  <div style="font-family:sans-serif;font-size:0.88rem">
477
  <table style="width:100%;border-collapse:collapse">
478
+ <thead><tr style="border-bottom:1px solid #334155">
479
+ <th style="padding:8px 14px;text-align:left;color:#64748b;font-weight:500">Encoding</th>
480
+ <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">Qubits</th>
481
+ <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">Gates</th>
482
+ <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">Depth</th>
483
+ <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">2Q</th>
484
+ <th style="padding:8px 14px;text-align:center;color:#64748b;font-weight:500">NISQ</th>
485
+ </tr></thead><tbody>{rows_html}</tbody>
 
 
 
486
  </table>
487
+ {"<p style='margin:10px 0 0;font-size:0.78rem;color:#475569'>★ recommended for task/budget</p>" if result.recommended else ""}
488
  {warn_html}
489
  </div>"""
490
+ except Exception as exc:
491
+ return f"<p>❌ {exc}</p>"
492
+ finally:
493
+ if tmp: _rm(tmp)
494
+
495
+
496
+ # ---------------------------------------------------------------------------
497
+ # Inspect
498
+ # ---------------------------------------------------------------------------
499
+
500
+ def run_inspect(source, csv_file, sample_name, hf_name, hf_split):
501
+ try:
502
+ from quprep.ingest.numpy_ingester import NumpyIngester
503
+ from quprep.ingest.profiler import profile
504
+ dtype, X, cols = load_data(source, csv_file, sample_name, hf_name, hf_split)
505
+ if dtype == "text":
506
+ return f"<p style='color:#94a3b8'>{len(X)} sentences loaded. Text data — no numeric profile.</p>"
507
+ if dtype == "graph":
508
+ n = X.shape[0]
509
+ edges = int((X != 0).sum() // 2)
510
+ return f"""<div style="font-family:monospace;font-size:0.9rem;line-height:1.8">
511
+ <div style="display:grid;grid-template-columns:auto 1fr;gap:4px 24px">
512
+ <span style="color:#94a3b8">Type</span> <span>Graph</span>
513
+ <span style="color:#94a3b8">Nodes</span> <span>{n}</span>
514
+ <span style="color:#94a3b8">Edges</span> <span>{edges}</span>
515
+ <span style="color:#94a3b8">Density</span><span>{edges / (n*(n-1)/2):.2f}</span>
516
+ </div></div>"""
517
+ p = profile(NumpyIngester().load(X))
518
+ missing = int(p.missing_counts.sum())
519
+ sparsity = 100.0 * (X == 0).sum() / X.size
520
+ feat_rows = "".join(
521
+ f"<tr><td style='padding:6px 12px;font-family:monospace'>{cols[i] if i < len(cols) else f'f{i}'}</td>"
522
+ f"<td style='padding:6px 12px;text-align:right'>{p.mins[i]:.3f}</td>"
523
+ f"<td style='padding:6px 12px;text-align:right'>{p.maxs[i]:.3f}</td>"
524
+ f"<td style='padding:6px 12px;text-align:right'>{p.means[i]:.3f}</td>"
525
+ f"<td style='padding:6px 12px;text-align:right'>{p.stds[i]:.3f}</td>"
526
+ f"<td style='padding:6px 12px;text-align:center'>{int(p.missing_counts[i])}</td></tr>"
527
+ for i in range(min(p.n_features, 12))
528
+ )
529
+ more = f"<p style='color:#475569;font-size:0.78rem'>… and {p.n_features-12} more</p>" if p.n_features > 12 else ""
530
+ return f"""
531
+ <div style="font-family:sans-serif;font-size:0.9rem;line-height:1.8">
532
+ <div style="display:grid;grid-template-columns:auto 1fr;gap:2px 24px;margin-bottom:20px">
533
+ <span style="color:#64748b">Type</span> <span>{dtype}</span>
534
+ <span style="color:#64748b">Shape</span> <span>{p.n_samples} × {p.n_features}</span>
535
+ <span style="color:#64748b">Missing</span> <span>{"none" if not missing else missing}</span>
536
+ <span style="color:#64748b">Sparsity</span> <span>{sparsity:.1f}% zeros</span>
537
+ </div>
538
+ <table style="width:100%;border-collapse:collapse;font-size:0.85rem">
539
+ <thead><tr style="border-bottom:1px solid #334155">
540
+ <th style="padding:6px 12px;text-align:left;color:#64748b;font-weight:500">Feature</th>
541
+ <th style="padding:6px 12px;text-align:right;color:#64748b;font-weight:500">Min</th>
542
+ <th style="padding:6px 12px;text-align:right;color:#64748b;font-weight:500">Max</th>
543
+ <th style="padding:6px 12px;text-align:right;color:#64748b;font-weight:500">Mean</th>
544
+ <th style="padding:6px 12px;text-align:right;color:#64748b;font-weight:500">Std</th>
545
+ <th style="padding:6px 12px;text-align:center;color:#64748b;font-weight:500">Missing</th>
546
+ </tr></thead><tbody>{feat_rows}</tbody>
547
+ </table>{more}
548
+ </div>"""
549
+ except Exception as exc:
550
+ return f"<p>❌ {exc}</p>"
551
+
552
+
553
+ # ---------------------------------------------------------------------------
554
+ # Suggest qubits
555
+ # ---------------------------------------------------------------------------
556
 
557
+ def run_suggest(source, csv_file, sample_name, hf_name, hf_split, task, max_qubits):
558
+ tmp = None
559
+ try:
560
+ import quprep as qd
561
+ dtype, X, _ = load_data(source, csv_file, sample_name, hf_name, hf_split)
562
+ if dtype not in ("tabular", "image", "timeseries"):
563
+ return "<p style='color:#fbbf24'>⚠️ Qubit suggestion works on numeric data.</p>"
564
+ tmp = _write_tmp(X)
565
+ kwargs = {"task": task}
566
+ if max_qubits > 0:
567
+ kwargs["max_qubits"] = max_qubits
568
+ suggestion = qd.suggest_qubits(tmp, **kwargs)
569
+ nisq_badge = _nisq(suggestion.nisq_safe)
570
+ warning_html = (
571
+ f"<div style='padding:8px 14px;background:#451a03;border-radius:6px;"
572
+ f"color:#fbbf24;font-size:0.82rem;margin-bottom:12px'>"
573
+ f"⚠️ {suggestion.warning}</div>"
574
+ if suggestion.warning else ""
575
+ )
576
+ return f"""
577
+ <div style="font-family:sans-serif;font-size:0.9rem">
578
+ <div style="display:flex;align-items:baseline;gap:16px;margin-bottom:20px">
579
+ <span style="font-size:2rem;font-weight:700;color:#e2e8f0">{suggestion.n_qubits}</span>
580
+ <span style="color:#a78bfa;font-weight:600">suggested qubits</span>
581
+ <span style="color:#475569;font-size:0.82rem">· {suggestion.n_features} features · task={task}</span>
582
+ <span style="margin-left:8px">{nisq_badge}</span>
583
+ </div>
584
+ {warning_html}
585
+ <div style="padding:10px 16px;background:#1e293b;border-radius:8px;color:#cbd5e1;font-size:0.85rem;margin-bottom:16px;line-height:1.6">{suggestion.reasoning}</div>
586
+ <div style="color:#64748b;font-size:0.82rem">Recommended encoding: <span style="color:#a78bfa;font-weight:600">{suggestion.encoding_hint}</span></div>
587
+ </div>"""
588
  except Exception as exc:
589
  return f"<p>❌ {exc}</p>"
590
  finally:
591
+ if tmp: _rm(tmp)
592
+
593
+
594
+ # ---------------------------------------------------------------------------
595
+ # Fingerprint
596
+ # ---------------------------------------------------------------------------
597
+
598
+ def run_fingerprint(source, csv_file, sample_name, hf_name, hf_split,
599
+ encoding, framework, reducer_type, n_components, n_qubits,
600
+ use_scaler, scaler_strategy,
601
+ use_imputer, imputer_strategy,
602
+ use_outlier, outlier_method,
603
+ use_fsel, fsel_method, fsel_max):
604
+ try:
605
+ import quprep as qd
606
+ dtype, X, _ = load_data(source, csv_file, sample_name, hf_name, hf_split)
607
+ if dtype not in ("tabular", "image", "timeseries"):
608
+ return "", "<p style='color:#fbbf24'>⚠️ Fingerprinting works on numeric data (tabular / image / time series).</p>"
609
+ X_use = X
610
+ _enc_map = {
611
+ "angle": qd.AngleEncoder,
612
+ "amplitude": qd.AmplitudeEncoder,
613
+ "basis": qd.BasisEncoder,
614
+ "iqp": qd.IQPEncoder,
615
+ "entangled_angle": qd.EntangledAngleEncoder,
616
+ "reupload": qd.ReUploadEncoder,
617
+ "hamiltonian": qd.HamiltonianEncoder,
618
+ "zz_feature_map": qd.ZZFeatureMapEncoder,
619
+ "pauli_feature_map": qd.PauliFeatureMapEncoder,
620
+ "random_fourier": qd.RandomFourierEncoder,
621
+ "tensor_product": qd.TensorProductEncoder,
622
+ "qaoa_problem": qd.QAOAProblemEncoder,
623
+ }
624
+ encoder_cls = _enc_map.get(encoding, qd.AngleEncoder)
625
+ encoder = encoder_cls()
626
+ if hasattr(encoder, "fit"):
627
+ encoder.fit(X_use)
628
+
629
+ _red_map = {
630
+ "pca": lambda: qd.PCAReducer(n_components=int(n_components)),
631
+ "lda": lambda: qd.LDAReducer(n_components=int(n_components)),
632
+ "spectral": lambda: qd.SpectralReducer(n_components=int(n_components)),
633
+ "tsne": lambda: qd.TSNEReducer(n_components=int(n_components)),
634
+ "hardware_aware": lambda: qd.HardwareAwareReducer(backend=int(n_qubits)),
635
+ }
636
+ reducer = _red_map[reducer_type]() if reducer_type != "none" else None
637
+ cleaner = qd.Imputer(strategy=imputer_strategy) if use_imputer else None
638
+ outlier = qd.OutlierHandler(method=outlier_method) if use_outlier else None
639
+ scaler = qd.Scaler(strategy=scaler_strategy) if use_scaler else None
640
+ selector = qd.FeatureSelector(method=fsel_method, max_features=int(fsel_max)) if use_fsel else None
641
+
642
+ exporter = _get_exporter(framework)
643
+ pipeline = qd.Pipeline(
644
+ encoder=encoder,
645
+ exporter=exporter,
646
+ reducer=reducer,
647
+ cleaner=cleaner,
648
+ normalizer=scaler,
649
+ preprocessor=selector,
650
+ )
651
+ pipeline.fit(X_use)
652
+ fp = qd.fingerprint_pipeline(pipeline)
653
+ fp_dict = json.loads(fp.to_json())
654
+ stages_html = "".join(
655
+ f"<tr><td style='padding:6px 12px;font-family:monospace'>{stage}</td>"
656
+ f"<td style='padding:6px 12px;color:#94a3b8'>{info.get('class','')}</td>"
657
+ f"<td style='padding:6px 12px;font-size:0.78rem;color:#64748b'>{json.dumps(info.get('params',{}))}</td></tr>"
658
+ for stage, info in fp_dict.get("stages", {}).items()
659
+ )
660
+ _fw_dep = {
661
+ "qiskit": "qiskit", "pennylane": "pennylane", "cirq": "cirq-core",
662
+ "tket": "pytket", "braket": "amazon-braket-sdk", "qsharp": "qsharp",
663
+ "iqm": "iqm-client",
664
+ }
665
+ active_dep = _fw_dep.get(framework)
666
+ deps_html = "".join(
667
+ f"<span style='background:{'#2d1f63' if k == active_dep else '#1e293b'};"
668
+ f"padding:2px 8px;border-radius:4px;font-family:monospace;font-size:0.8rem;"
669
+ f"margin:2px;color:{'#a78bfa' if k == active_dep else 'inherit'}'>"
670
+ f"{k}=={v}{' ← active exporter' if k == active_dep else ''}</span> "
671
+ for k, v in fp_dict.get("dependencies", {}).items()
672
+ )
673
+ return f"""
674
+ <div style="font-family:sans-serif;font-size:0.9rem">
675
+ <p style="margin:0 0 8px;font-size:0.75rem;font-weight:600;color:#94a3b8;text-transform:uppercase;letter-spacing:.05em">Pipeline hash</p>
676
+ <div style="background:#1e293b;border-radius:8px;padding:12px 16px;font-family:monospace;font-size:0.88rem;color:#a78bfa;word-break:break-all;margin-bottom:20px">sha256:{fp.hash}</div>
677
+ <p style="margin:0 0 8px;font-size:0.75rem;font-weight:600;color:#94a3b8;text-transform:uppercase;letter-spacing:.05em">Stages</p>
678
+ <table style="width:100%;border-collapse:collapse;font-size:0.85rem;margin-bottom:16px">
679
+ <thead><tr style="border-bottom:1px solid #334155">
680
+ <th style="padding:6px 12px;text-align:left;color:#64748b;font-weight:500">Stage</th>
681
+ <th style="padding:6px 12px;text-align:left;color:#64748b;font-weight:500">Class</th>
682
+ <th style="padding:6px 12px;text-align:left;color:#64748b;font-weight:500">Params</th>
683
+ </tr></thead><tbody>{stages_html}</tbody>
684
+ </table>
685
+ <p style="margin:0 0 8px;font-size:0.75rem;font-weight:600;color:#94a3b8;text-transform:uppercase;letter-spacing:.05em">Key dependencies</p>
686
+ <div style="line-height:2">{deps_html}</div>
687
+ </div>""", ""
688
+ except Exception as exc:
689
+ return "", f"<p>❌ {exc}</p>"
690
+
691
+
692
+ # ---------------------------------------------------------------------------
693
+ # QUBO / QAOA
694
+ # ---------------------------------------------------------------------------
695
+
696
+ def run_qubo(adj_text, p_layers, problem):
697
+ try:
698
+ from quprep.qubo import max_cut, qaoa_circuit
699
+ rows = [r.strip() for r in adj_text.strip().splitlines() if r.strip()]
700
+ adj = np.array([[float(x) for x in r.split()] for r in rows])
701
+ q = max_cut(adj)
702
+ qasm = qaoa_circuit(q, p=p_layers)
703
+ info = f"""
704
+ <div style="font-family:monospace;font-size:0.9rem;line-height:1.8">
705
+ <div style="display:grid;grid-template-columns:auto 1fr;gap:2px 20px">
706
+ <span style="color:#94a3b8">Problem</span> <span>{problem.replace('_',' ').title()}</span>
707
+ <span style="color:#94a3b8">Nodes</span> <span>{adj.shape[0]}</span>
708
+ <span style="color:#94a3b8">Edges</span> <span>{int((adj != 0).sum() // 2)}</span>
709
+ <span style="color:#94a3b8">QAOA p</span> <span>{p_layers}</span>
710
+ <span style="color:#94a3b8">Qubits</span> <span>{q.n_original}</span>
711
+ </div>
712
+ </div>"""
713
+ return qasm, info
714
+ except Exception as exc:
715
+ return "", f"<p>❌ {exc}</p>"
716
 
717
 
718
  # ---------------------------------------------------------------------------
 
720
  # ---------------------------------------------------------------------------
721
 
722
  THEME = gr.themes.Soft(primary_hue="violet", secondary_hue="blue")
 
723
  CSS = """
724
+ #header-left, #header-right {
725
+ border:1px solid #334155 !important;
726
+ border-radius:12px !important;
727
+ padding:20px 24px !important;
 
 
728
  }
729
+ #circuit-out .codemirror-wrapper,
730
+ #circuit-out .cm-editor {
731
+ max-height: 420px !important;
732
+ overflow-y: auto !important;
733
  }
734
  """
735
 
736
+ SOURCE_CHOICES = ["📋 Sample dataset", "📁 Upload CSV", "🤗 HuggingFace Hub"]
737
+
738
+ with gr.Blocks(title="QuPrep — Quantum Data Preparation") as demo:
739
 
740
+ # ── Header ─────────────────────────────────────────────────────────────
741
  with gr.Row(equal_height=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
 
743
+ with gr.Column(scale=1, elem_id="header-left"):
 
744
  gr.HTML("""
745
+ <p style="margin:0 0 2px;font-size:1.5rem;font-weight:700;color:#e2e8f0">⚛️ QuPrep</p>
746
+ <p style="margin:0 0 14px;font-size:0.9rem;font-weight:500;color:#a78bfa">v0.8.0 · Quantum Data Preparation</p>
747
+ <p style="margin:0 0 14px;font-size:0.85rem;color:#94a3b8;line-height:1.6">
748
+ The missing preprocessing layer between classical datasets and quantum computing.
749
+ </p>
750
+ <div style="display:flex;flex-direction:column;gap:8px;font-size:0.85rem">
751
+ <div>📦 <code style="background:#1e293b;padding:2px 8px;border-radius:4px">pip install quprep</code></div>
752
+ <div>📖 <a href="https://docs.quprep.org" target="_blank" style="color:#818cf8">docs.quprep.org</a></div>
753
+ <div>💻 <a href="https://github.com/quprep/quprep" target="_blank" style="color:#818cf8">github.com/quprep/quprep</a></div>
754
+ </div>
755
+ <p style="margin:14px 0 0;font-size:0.75rem;color:#475569">13 encodings · 8 frameworks · Apache 2.0 · Python ≥ 3.10</p>""")
756
+
757
+ with gr.Column(scale=1, elem_id="header-right"):
758
+ gr.HTML('<p style="margin:0 0 10px;font-size:1.1rem;font-weight:700;color:#e2e8f0">📂 Data source</p>')
759
+ source_radio = gr.Radio(
760
+ choices=SOURCE_CHOICES,
761
+ value="📋 Sample dataset",
762
+ label="",
763
+ container=False,
764
+ )
765
+ sample_dd = gr.Dropdown(
766
+ choices=list(SAMPLES.keys()),
767
+ value="Iris (tabular · 150×4)",
768
+ label="Sample dataset",
769
+ visible=True,
770
+ )
771
  csv_upload = gr.File(
772
+ label="Upload CSV / TSV",
773
  file_types=[".csv", ".tsv"],
774
+ height=100,
775
+ visible=False,
776
  )
777
+ with gr.Row(visible=False) as hf_row:
778
+ hf_name = gr.Textbox(label="Dataset (owner/name)", placeholder="scikit-learn/iris", scale=3)
779
+ hf_split = gr.Textbox(label="Split", value="train", scale=1)
780
+
781
+ def _toggle_source(s):
782
+ is_sample = s == "📋 Sample dataset"
783
+ is_upload = s == "📁 Upload CSV"
784
+ is_hf = s == "🤗 HuggingFace Hub"
785
+ return (
786
+ gr.update(visible=is_sample),
787
+ gr.update(visible=is_upload),
788
+ gr.update(visible=is_hf),
789
+ )
790
+
791
+ source_radio.change(
792
+ fn=_toggle_source,
793
+ inputs=source_radio,
794
+ outputs=[sample_dd, csv_upload, hf_row],
795
  )
 
 
 
796
 
797
+ def _src(s):
798
+ return {"📋 Sample dataset": "sample",
799
+ "📁 Upload CSV": "upload",
800
+ "🤗 HuggingFace Hub": "huggingface"}[s]
801
+
802
+ def _inputs(*extra):
803
+ return [source_radio, csv_upload, sample_dd, hf_name, hf_split] + list(extra)
804
+
805
+ # ── Tabs ───────────────────────────────────────────────────────────────
806
  with gr.Tabs():
807
 
808
+ # Convert ──────────────────────────────────────────────────────────
809
  with gr.TabItem("Convert"):
810
  with gr.Row():
811
+ with gr.Column(scale=1, min_width=210):
812
+ enc_dd = gr.Dropdown(choices=ENCODINGS, value="angle", label="Encoding")
813
+ enc_info = gr.Markdown(f"<small><i>{ENCODING_DESC['angle']}</i></small>")
814
+ fw_dd = gr.Dropdown(choices=FRAMEWORKS, value="qasm", label="Framework")
815
+ ns_sl = gr.Slider(1, 20, value=5, step=1, label="Samples")
816
+ nq_sl = gr.Slider(0, 1121, value=0, step=1, label="Qubit budget (0=auto)")
817
+ gr.HTML('<p style="font-size:0.75rem;color:#475569;margin:4px 0">Graph & text data use fixed encoding — encoding/framework dropdowns are ignored.</p>')
818
+ conv_btn = gr.Button("Convert →", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819
  with gr.Column(scale=4):
820
+ conv_status = gr.Textbox(label="", lines=1, max_lines=1, interactive=False,
821
+ show_label=False, placeholder="Press Convert →")
 
 
 
 
 
822
  with gr.Row(equal_height=True):
823
  with gr.Column(scale=3):
824
+ circuit_out = gr.Code(label="Circuit (sample 0)", language="python", lines=18, max_lines=25, elem_id="circuit-out")
 
 
 
825
  with gr.Column(scale=1):
826
+ cost_out = gr.HTML(label="Cost")
827
+ with gr.Row():
828
+ input_tbl = gr.Dataframe(label="Input data (first 5)", interactive=False)
829
+ encoded_tbl = gr.Dataframe(label="Encoded parameters (first 5)", interactive=False)
830
 
831
+ enc_dd.change(fn=lambda e: f"<small><i>{ENCODING_DESC.get(e,'')}</i></small>",
832
+ inputs=enc_dd, outputs=enc_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
+ def _conv(src, csv, samp, hfn, hfs, enc, fw, ns, nq):
835
+ return run_convert(_src(src), csv, samp, hfn, hfs, enc, fw, ns, nq)
836
+
837
+ conv_btn.click(fn=_conv,
838
+ inputs=_inputs(enc_dd, fw_dd, ns_sl, nq_sl),
839
+ outputs=[input_tbl, encoded_tbl, circuit_out, cost_out, conv_status])
840
 
841
+ # Recommend ────────────────────────────────────────────────────────
842
  with gr.TabItem("Recommend"):
843
  with gr.Row():
844
  with gr.Column(scale=1):
845
+ rec_task = gr.Dropdown(choices=TASKS, value="classification", label="Task")
846
+ rec_qsl = gr.Slider(0, 1121, value=0, step=1, label="Qubit budget (0=auto)")
847
+ rec_btn = gr.Button("Recommend →", variant="primary")
 
 
 
 
 
 
 
848
  with gr.Column(scale=2):
849
+ rec_out = gr.HTML(value="<p style='color:#475569;padding:24px 0'>Click <strong>Recommend →</strong></p>")
 
 
 
 
 
 
 
 
 
 
 
 
 
850
 
851
+ def _rec(src, csv, samp, hfn, hfs, task, nq):
852
+ return run_recommend(_src(src), csv, samp, hfn, hfs, task, nq)
853
+
854
+ rec_btn.click(fn=_rec, inputs=_inputs(rec_task, rec_qsl), outputs=rec_out)
855
+
856
+ # Compare ──────────────────────────────────────────────────────────
857
  with gr.TabItem("Compare encoders"):
858
  with gr.Row():
859
  with gr.Column(scale=1):
860
+ cmp_task = gr.Dropdown(choices=TASKS, value="classification", label="Task")
861
+ cmp_qsl = gr.Slider(0, 20, value=8, step=1, label="Qubit budget")
862
+ cmp_btn = gr.Button("Compare →", variant="primary")
 
 
 
 
 
 
 
863
  with gr.Column(scale=2):
864
+ cmp_out = gr.HTML(value="<p style='color:#475569;padding:24px 0'>Click <strong>Compare →</strong></p>")
865
+
866
+ def _cmp(src, csv, samp, hfn, hfs, task, nq):
867
+ return run_compare(_src(src), csv, samp, hfn, hfs, task, nq)
868
+
869
+ cmp_btn.click(fn=_cmp, inputs=_inputs(cmp_task, cmp_qsl), outputs=cmp_out)
 
 
 
 
 
 
 
 
870
 
871
+ # Inspect ──────────────────────────────────────────────────────────
872
+ with gr.TabItem("Inspect"):
873
+ with gr.Row():
874
+ with gr.Column(scale=1):
875
+ ins_btn = gr.Button("Inspect →", variant="primary")
876
+ gr.HTML('<p style="font-size:0.82rem;color:#475569;margin:8px 0">Shape, types, missing, sparsity, per-feature stats.</p>')
877
+ with gr.Column(scale=3):
878
+ ins_out = gr.HTML(value="<p style='color:#475569;padding:24px 0'>Click <strong>Inspect →</strong></p>")
879
+
880
+ def _ins(src, csv, samp, hfn, hfs):
881
+ return run_inspect(_src(src), csv, samp, hfn, hfs)
882
+
883
+ ins_btn.click(fn=_ins, inputs=_inputs(), outputs=ins_out)
884
+
885
+ # Suggest qubits ───────────────────────────────────────────────────
886
+ with gr.TabItem("Suggest qubits"):
887
+ with gr.Row():
888
+ with gr.Column(scale=1):
889
+ sug_task = gr.Dropdown(choices=TASKS, value="classification", label="Task")
890
+ sug_max = gr.Slider(0, 30, value=0, step=1, label="Max qubits (0 = no ceiling)")
891
+ sug_btn = gr.Button("Suggest →", variant="primary")
892
+ gr.HTML('<p style="font-size:0.82rem;color:#475569;margin:8px 0">Returns the minimum qubit count that fits your data and task, with per-encoding breakdown.</p>')
893
+ with gr.Column(scale=3):
894
+ sug_out = gr.HTML(value="<p style='color:#475569;padding:24px 0'>Click <strong>Suggest →</strong></p>")
895
+
896
+ def _sug(src, csv, samp, hfn, hfs, task, mq):
897
+ return run_suggest(_src(src), csv, samp, hfn, hfs, task, mq)
898
+
899
+ sug_btn.click(fn=_sug, inputs=_inputs(sug_task, sug_max), outputs=sug_out)
900
+
901
+ # Fingerprint ──────────────────────────────────────────────────────
902
+ with gr.TabItem("Fingerprint"):
903
+ with gr.Row():
904
+ with gr.Column(scale=1):
905
+ fp_enc = gr.Dropdown(choices=ENCODINGS, value="angle", label="Encoder")
906
+ fp_fw = gr.Dropdown(choices=FRAMEWORKS, value="qasm", label="Exporter (framework)")
907
+ with gr.Accordion("Reducer", open=False):
908
+ fp_red = gr.Dropdown(choices=["none","pca","lda","spectral","tsne","hardware_aware"], value="none", label="Type")
909
+ fp_nc = gr.Slider(1, 64, value=4, step=1, label="n_components", visible=False)
910
+ fp_nq = gr.Slider(1, 1121, value=8, step=1, label="Qubit budget (hardware_aware)", visible=False)
911
+ with gr.Accordion("Scaler", open=False):
912
+ fp_scl = gr.Checkbox(label="Enable scaler", value=False)
913
+ fp_sst = gr.Dropdown(choices=["minmax","minmax_pi","minmax_pm_pi","zscore","l2","binary","pm_one"], value="minmax_pi", label="Strategy", visible=False)
914
+ with gr.Accordion("Cleaner", open=False):
915
+ fp_imp = gr.Checkbox(label="Enable imputer", value=False)
916
+ fp_ist = gr.Dropdown(choices=["mean","median","mode","knn","drop"], value="mean", label="Imputer strategy", visible=False)
917
+ fp_out = gr.Checkbox(label="Enable outlier handler", value=False)
918
+ fp_ost = gr.Dropdown(choices=["iqr","zscore","isolation_forest"], value="iqr", label="Outlier method", visible=False)
919
+ with gr.Accordion("Feature selector", open=False):
920
+ fp_fsel = gr.Checkbox(label="Enable feature selector", value=False)
921
+ fp_fsm = gr.Dropdown(choices=["correlation","mutual_info","variance"], value="correlation", label="Method", visible=False)
922
+ fp_fsmx = gr.Slider(1, 64, value=8, step=1, label="Max features", visible=False)
923
+ fp_btn = gr.Button("Fingerprint →", variant="primary")
924
+ gr.HTML('<p style="font-size:0.82rem;color:#475569;margin:8px 0">Generates a deterministic SHA-256 hash of the pipeline config — stable across runs for the same setup.</p>')
925
+ with gr.Column(scale=3):
926
+ fp_result = gr.HTML(value="<p style='color:#475569;padding:24px 0'>Click <strong>Fingerprint →</strong></p>")
927
+ fp_errmsg = gr.HTML()
928
+
929
+ fp_red.change(fn=lambda v: (gr.update(visible=v not in ("none","hardware_aware")), gr.update(visible=v=="hardware_aware")),
930
+ inputs=fp_red, outputs=[fp_nc, fp_nq])
931
+ fp_scl.change(fn=lambda v: gr.update(visible=v), inputs=fp_scl, outputs=fp_sst)
932
+ fp_imp.change(fn=lambda v: gr.update(visible=v), inputs=fp_imp, outputs=fp_ist)
933
+ fp_out.change(fn=lambda v: gr.update(visible=v), inputs=fp_out, outputs=fp_ost)
934
+ fp_fsel.change(fn=lambda v: (gr.update(visible=v), gr.update(visible=v)),
935
+ inputs=fp_fsel, outputs=[fp_fsm, fp_fsmx])
936
+
937
+ def _fp(src, csv, samp, hfn, hfs, enc, fw, red, nc, nq, scl, sst, imp, ist, out_flag, ost, fsel, fsm, fsmx):
938
+ return run_fingerprint(_src(src), csv, samp, hfn, hfs,
939
+ enc, fw, red, nc, nq, scl, sst, imp, ist, out_flag, ost, fsel, fsm, int(fsmx))
940
+
941
+ fp_btn.click(fn=_fp,
942
+ inputs=_inputs(fp_enc, fp_fw, fp_red, fp_nc, fp_nq,
943
+ fp_scl, fp_sst, fp_imp, fp_ist,
944
+ fp_out, fp_ost, fp_fsel, fp_fsm, fp_fsmx),
945
+ outputs=[fp_result, fp_errmsg])
946
+
947
+ # QUBO / QAOA ──────────────────────────────────────────────────────
948
+ with gr.TabItem("QUBO / QAOA"):
949
+ gr.HTML('<p style="font-size:0.82rem;color:#475569;margin:4px 0 12px">Independent of the data selector above — takes a graph adjacency matrix directly.</p>')
950
+ with gr.Row():
951
+ with gr.Column(scale=1):
952
+ qb_prob = gr.Dropdown(choices=["max_cut"], value="max_cut", label="Problem")
953
+ qb_adj = gr.Textbox(label="Adjacency matrix (space-separated rows)",
954
+ value="0 1 1\n1 0 1\n1 1 0", lines=5)
955
+ qb_p = gr.Slider(1, 5, value=2, step=1, label="QAOA layers (p)")
956
+ qb_btn = gr.Button("Generate QAOA circuit →", variant="primary")
957
+ with gr.Column(scale=3):
958
+ qb_info = gr.HTML(value="<p style='color:#475569'>Problem stats will appear here.</p>")
959
+ qb_out = gr.Code(label="QAOA circuit (OpenQASM 3.0)", language="python", lines=20)
960
+
961
+ qb_btn.click(fn=run_qubo, inputs=[qb_adj, qb_p, qb_prob], outputs=[qb_out, qb_info])
962
+
963
+ # About ────────────────────────────────────────────────────────────
964
  with gr.TabItem("About"):
965
+ gr.Markdown("""
 
966
  ## About QuPrep
967
 
968
+ The missing preprocessing layer between classical datasets and quantum computing.
 
 
 
969
 
970
  ### Pipeline
 
971
  ```
972
+ Connect → Ingest → Clean → Reduce → Normalise → Encode → Export
973
  ```
974
+ Supports tabular, image, time series, graph, and text data.
975
+ Data connectors: HuggingFace Hub, OpenML, Kaggle, CSV/NumPy upload.
976
 
977
+ ### Supported encodings (13)
 
 
 
 
 
 
 
 
 
978
  | Encoding | Qubits | NISQ-safe |
979
  |---|---|---|
980
+ | Angle | d | ✓ |
981
+ | Amplitude | ⌈log₂d⌉ | ✗ |
982
  | Basis | d | ✓ |
983
  | IQP | d | conditional |
984
  | Entangled Angle | d | ✓ |
985
+ | Re-Upload | d | ✓ |
986
  | Hamiltonian | d | ✗ |
987
  | ZZ Feature Map | d | conditional |
988
  | Pauli Feature Map | d | conditional |
989
  | Random Fourier | n_components | ✓ |
990
  | Tensor Product | ⌈d/2⌉ | ✓ |
991
+ | QAOA Problem | d | ✓ |
992
+ | Graph State | n_nodes | ✓ |
993
 
994
  ### Links
995
+ - 📦 [pypi.org/project/quprep](https://pypi.org/project/quprep/)
996
+ - 📖 [docs.quprep.org](https://docs.quprep.org)
997
+ - 🌐 [quprep.org](https://quprep.org)
998
+ - 💻 [github.com/quprep/quprep](https://github.com/quprep/quprep)
999
 
1000
+ Apache 2.0 · Python ≥ 3.10
1001
+ """)
 
 
 
 
 
 
1002
 
1003
  if __name__ == "__main__":
1004
+ demo.launch(theme=THEME, css=CSS)
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- quprep[qiskit,pennylane,cirq,tket,braket,qsharp,viz]>=0.6.0
2
  gradio>=4.0
3
  pandas
4
  numpy
 
1
+ quprep[qiskit,pennylane,cirq,tket,braket,qsharp,viz,huggingface,openml,image,text]>=0.8.0
2
  gradio>=4.0
3
  pandas
4
  numpy