Adhitya122 commited on
Commit
f3e2722
·
verified ·
1 Parent(s): 5511b8f

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. index.html +171 -396
index.html CHANGED
@@ -3,455 +3,230 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>MolForge | Verifier-Driven RL for Drug Discovery</title>
7
- <link rel="preconnect" href="https://fonts.googleapis.com">
8
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
- <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800&family=Outfit:wght@400;600;800&display=swap" rel="stylesheet">
10
  <style>
11
- :root {
12
- --primary: #6366f1;
13
- --primary-glow: rgba(99, 102, 241, 0.5);
14
- --secondary: #8b5cf6;
15
- --bg: #0f172a;
16
- --card-bg: rgba(30, 41, 59, 0.7);
17
- --text: #f8fafc;
18
- --text-dim: #94a3b8;
19
- --glass: rgba(255, 255, 255, 0.03);
20
- --glass-border: rgba(255, 255, 255, 0.1);
21
- }
22
-
23
- * {
24
- margin: 0;
25
- padding: 0;
26
- box-sizing: border-box;
27
- }
28
-
29
  body {
30
- font-family: 'Inter', sans-serif;
31
- background-color: var(--bg);
32
- background-image:
33
- radial-gradient(circle at 20% 20%, rgba(99, 102, 241, 0.15) 0%, transparent 40%),
34
- radial-gradient(circle at 80% 80%, rgba(139, 92, 246, 0.15) 0%, transparent 40%);
35
- color: var(--text);
36
- line-height: 1.6;
37
- overflow-x: hidden;
38
  }
39
-
40
- .container {
41
- max-width: 1100px;
42
  margin: 0 auto;
43
- padding: 0 2rem;
44
  }
45
-
46
- /* Hero Section */
47
- header {
48
- height: 90vh;
49
- display: flex;
50
- flex-direction: column;
51
- justify-content: center;
52
- align-items: center;
53
- text-align: center;
54
- position: relative;
55
  }
56
-
57
- .badge {
58
- background: var(--glass);
59
- border: 1px solid var(--glass-border);
60
- padding: 0.5rem 1.2rem;
61
- border-radius: 99px;
62
- font-size: 0.85rem;
63
  font-weight: 600;
64
- color: var(--primary);
65
- margin-bottom: 1.5rem;
66
- display: inline-block;
67
- backdrop-filter: blur(10px);
68
- }
69
-
70
- h1 {
71
- font-family: 'Outfit', sans-serif;
72
- font-size: clamp(3rem, 8vw, 5.5rem);
73
- font-weight: 800;
74
- line-height: 1.1;
75
- margin-bottom: 1.5rem;
76
- background: linear-gradient(to bottom right, #fff 30%, var(--text-dim));
77
- -webkit-background-clip: text;
78
- -webkit-text-fill-color: transparent;
79
- }
80
-
81
- .hero-tagline {
82
- font-size: clamp(1.1rem, 3vw, 1.4rem);
83
- color: var(--text-dim);
84
- max-width: 700px;
85
- margin-bottom: 3rem;
86
- }
87
-
88
- .cta-group {
89
- display: flex;
90
- gap: 1.5rem;
91
- flex-wrap: wrap;
92
- justify-content: center;
93
- }
94
-
95
- .btn {
96
- padding: 1rem 2.5rem;
97
- border-radius: 12px;
98
- font-weight: 700;
99
- text-decoration: none;
100
- transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
101
  display: inline-flex;
102
  align-items: center;
103
- gap: 0.5rem;
104
- }
105
-
106
- .btn-primary {
107
- background: var(--primary);
108
- color: white;
109
- box-shadow: 0 10px 20px -10px var(--primary-glow);
110
- }
111
-
112
- .btn-primary:hover {
113
- transform: translateY(-2px);
114
- box-shadow: 0 15px 30px -10px var(--primary-glow);
115
- filter: brightness(1.1);
116
- }
117
-
118
- .btn-secondary {
119
- background: var(--glass);
120
- border: 1px solid var(--glass-border);
121
- color: white;
122
- }
123
-
124
- .btn-secondary:hover {
125
- background: var(--glass-border);
126
- transform: translateY(-2px);
127
- }
128
-
129
- /* Section Styling */
130
- section {
131
- padding: 8rem 0;
132
- }
133
-
134
- .section-header {
135
- margin-bottom: 4rem;
136
- text-align: center;
137
- }
138
-
139
- .section-header h2 {
140
- font-family: 'Outfit', sans-serif;
141
- font-size: 2.5rem;
142
- margin-bottom: 1rem;
143
- }
144
-
145
- .section-header p {
146
- color: var(--text-dim);
147
- max-width: 600px;
148
- margin: 0 auto;
149
- }
150
-
151
- /* Pillars Grid */
152
- .pillars-grid {
153
- display: grid;
154
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
155
- gap: 2rem;
156
- }
157
-
158
- .pillar-card {
159
- background: var(--card-bg);
160
- border: 1px solid var(--glass-border);
161
- padding: 2.5rem;
162
- border-radius: 24px;
163
- transition: all 0.4s ease;
164
- backdrop-filter: blur(12px);
165
- }
166
-
167
- .pillar-card:hover {
168
- transform: translateY(-10px);
169
- border-color: var(--primary);
170
- box-shadow: 0 20px 40px -20px rgba(0,0,0,0.5);
171
- }
172
-
173
- .pillar-icon {
174
- font-size: 2rem;
175
- margin-bottom: 1.5rem;
176
- background: var(--glass);
177
- width: 60px;
178
- height: 60px;
179
- display: flex;
180
- align-items: center;
181
- justify-content: center;
182
- border-radius: 16px;
183
- }
184
-
185
- .pillar-card h3 {
186
- font-size: 1.4rem;
187
- margin-bottom: 1rem;
188
- color: var(--primary);
189
  }
190
-
191
- .pillar-card p {
192
- color: var(--text-dim);
193
- font-size: 0.95rem;
194
- }
195
-
196
- /* Visuals Section */
197
- .visual-container {
198
- background: var(--card-bg);
199
- border: 1px solid var(--glass-border);
200
- border-radius: 32px;
201
- padding: 3rem;
202
- margin-bottom: 4rem;
203
- overflow: hidden;
204
- }
205
-
206
- .visual-container img {
207
- width: 100%;
208
- height: auto;
209
- border-radius: 16px;
210
- box-shadow: 0 20px 50px rgba(0,0,0,0.4);
211
- }
212
-
213
- .visual-label {
214
- display: block;
215
- text-align: center;
216
- margin-top: 1.5rem;
217
- color: var(--text-dim);
218
- font-weight: 500;
219
- }
220
-
221
- /* Results Table */
222
- .table-wrapper {
223
- overflow-x: auto;
224
- background: var(--glass);
225
- border-radius: 20px;
226
- border: 1px solid var(--glass-border);
227
- }
228
-
229
- table {
230
- width: 100%;
231
- border-collapse: collapse;
232
- text-align: left;
233
- }
234
-
235
- th, td {
236
- padding: 1.5rem;
237
- border-bottom: 1px solid var(--glass-border);
238
- }
239
-
240
- th {
241
- background: rgba(255,255,255,0.05);
242
- font-weight: 700;
243
- text-transform: uppercase;
244
- font-size: 0.75rem;
245
- letter-spacing: 0.1em;
246
- color: var(--text-dim);
247
- }
248
-
249
- .improvement {
250
- color: #10b981;
251
- font-weight: 800;
252
- }
253
-
254
- /* POMDP Info */
255
- .pomdp-box {
256
- display: grid;
257
- grid-template-columns: 1fr 1fr;
258
- gap: 2rem;
259
- margin-top: 3rem;
260
- }
261
-
262
- .state-card {
263
- background: var(--glass);
264
- padding: 2rem;
265
- border-radius: 20px;
266
- border-left: 4px solid var(--primary);
267
- }
268
-
269
- .state-card h4 {
270
- margin-bottom: 1rem;
271
- color: var(--text);
272
- }
273
-
274
- /* Footer */
275
- footer {
276
- padding: 6rem 0;
277
- text-align: center;
278
- border-top: 1px solid var(--glass-border);
279
- }
280
-
281
- .footer-links {
282
- display: flex;
283
- justify-content: center;
284
- gap: 2rem;
285
- margin-bottom: 2rem;
286
- }
287
-
288
- .footer-links a {
289
- color: var(--text-dim);
290
- text-decoration: none;
291
- font-weight: 600;
292
- transition: color 0.3s;
293
- }
294
-
295
- .footer-links a:hover {
296
- color: var(--primary);
297
- }
298
-
299
- @media (max-width: 768px) {
300
- .pomdp-box {
301
- grid-template-columns: 1fr;
302
- }
303
- h1 { font-size: 3rem; }
304
- .pillars-grid { grid-template-columns: 1fr; }
305
  }
306
  </style>
307
  </head>
308
- <body>
309
-
310
- <div class="container">
311
- <!-- Hero Section -->
312
- <header>
313
- <div class="badge">OpenEnv Hackathon 2026</div>
314
- <h1>MolForge</h1>
315
- <p class="hero-tagline">A verifier-driven reinforcement learning environment for oncology drug discovery, where the LLM is the scientist, not the judge.</p>
316
- <div class="cta-group">
317
- <a href="https://colab.research.google.com/drive/1c6npGkGNbbbd8XFNeS6zInBpopLnJ4W4?usp=sharing" target="_blank" class="btn btn-primary">
318
- Launch Training Notebook
319
- </a>
320
- <a href="#pillars" class="btn btn-secondary">Explore the Pillars</a>
321
  </div>
322
- </header>
323
-
324
- <!-- POMDP Section -->
325
- <section id="architecture">
326
- <div class="section-header">
327
- <h2>Scientific Architecture</h2>
328
- <p>MolForge operates as a Partially Observable Markov Decision Process (POMDP), forcing models to operate under real-world uncertainty.</p>
 
 
329
  </div>
330
-
331
- <div class="visual-container">
332
- <img src="assets/molforge_architecture.png" alt="MolForge Architecture">
333
- <span class="visual-label">Closed-loop scientific feedback architecture</span>
 
 
 
 
 
 
 
 
334
  </div>
335
-
336
- <div class="pomdp-box">
337
- <div class="state-card">
338
- <h4>Hidden Reality</h4>
339
- <p>The ground-truth scoring for potency, safety, and synthesizability. Includes late-stage mutation traps that only evidence can reveal.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  </div>
341
- <div class="state-card">
342
- <h4>Visible Evidence</h4>
343
- <p>Noisy assay reports from RDKit and TDC, remaining budget, and structured feedback from the governance board.</p>
344
  </div>
345
  </div>
346
- </section>
347
 
348
- <!-- Pillars Section -->
349
- <section id="pillars">
350
- <div class="section-header">
351
- <h2>The Seven Pillars</h2>
352
- <p>Beyond simple molecule generation: a complete medicinal chemistry workflow optimizer.</p>
353
  </div>
 
354
 
355
- <div class="pillars-grid">
356
- <div class="pillar-card">
357
- <div class="pillar-icon">🧪</div>
358
- <h3>Verifier-First</h3>
359
- <p>The LLM is held accountable by RDKit and TDC simulation engines. It must justify every decision with verifiable data.</p>
 
 
360
  </div>
361
- <div class="pillar-card">
362
- <div class="pillar-icon">🧬</div>
363
- <h3>Physics Grounded</h3>
364
- <p>Heuristic docking scores simulate pocket matching, lipophilic fit, and polarity clash in milliseconds.</p>
365
  </div>
366
- <div class="pillar-card">
367
- <div class="pillar-icon">🔄</div>
368
- <h3>Self-Correction</h3>
369
- <p>A structured loop where agents receive reviews on their edits and iteratively repair candidates.</p>
370
  </div>
371
- <div class="pillar-card">
372
- <div class="pillar-icon">📊</div>
373
- <h3>Decomposed Rewards</h3>
374
- <p>Fine-grained observability into research, edits, and coordination—not just a single vague scalar.</p>
375
  </div>
376
- <div class="pillar-card">
377
- <div class="pillar-icon">🔬</div>
378
- <h3>Evidence-Based</h3>
379
- <p>Constant, verifiable reviews drive the model toward sound scientific design rather than pattern matching.</p>
380
  </div>
381
- <div class="pillar-card">
382
- <div class="pillar-icon">🎓</div>
383
- <h3>Curriculum Learning</h3>
384
- <p>Partial credit "breadcrumbs" for early RL exploration, transitioning to strict evaluation for grading.</p>
385
  </div>
386
- <div class="pillar-card">
387
- <div class="pillar-icon">🤝</div>
388
- <h3>Governance</h3>
389
- <p>Multi-agent specialist board reviews every plan and execution to ensure rigor and safety.</p>
390
  </div>
391
  </div>
392
  </section>
393
 
394
- <!-- Results Section -->
395
- <section id="results">
396
- <div class="section-header">
397
- <h2>Training & Performance</h2>
398
- <p>Comparing the Supervised Fine-Tuning (SFT) baseline against the final GRPO-trained policy.</p>
399
- </div>
400
-
401
- <div class="visual-container">
402
- <img src="assets/reward_curve.png" alt="Reward Curve">
403
- <span class="visual-label">Learning progression from sparse rewards to consistent submissions</span>
404
  </div>
405
 
406
- <div class="table-wrapper">
407
- <table>
408
- <thead>
409
  <tr>
410
- <th>Scenario Difficulty</th>
411
- <th>Before (SFT)</th>
412
- <th>After (RL)</th>
413
- <th>Improvement</th>
414
  </tr>
415
  </thead>
416
- <tbody>
417
  <tr>
418
- <td><strong>Level 0: Easy</strong></td>
419
- <td>0.1167</td>
420
- <td>0.1295</td>
421
- <td class="improvement">+10.9%</td>
422
  </tr>
423
  <tr>
424
- <td><strong>Level 1: Medium</strong></td>
425
- <td>0.1167</td>
426
- <td>0.1278</td>
427
- <td class="improvement">+9.5%</td>
428
  </tr>
429
  <tr>
430
- <td><strong>Level 2: Hard</strong></td>
431
- <td>0.0800</td>
432
- <td>0.0866</td>
433
- <td class="improvement">+8.3%</td>
434
  </tr>
435
  </tbody>
436
  </table>
437
  </div>
438
 
439
- <div style="margin-top: 4rem;">
440
- <img src="assets/Logs.png" alt="Training Logs" style="width: 100%; border-radius: 20px; border: 1px solid var(--glass-border);">
441
- <span class="visual-label">Detailed action telemetry and governance history</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  </div>
443
  </section>
444
 
445
  <!-- Footer -->
446
- <footer>
447
- <div class="footer-links">
448
- <a href="https://github.com/Adhitya-Vardhan/molt_lab" target="_blank">GitHub Repository</a>
449
- <a href="https://huggingface.co/Adhitya122/molforge-grpo-oncology" target="_blank">Model Card</a>
450
- <a href="https://colab.research.google.com/drive/1c6npGkGNbbbd8XFNeS6zInBpopLnJ4W4?usp=sharing" target="_blank">Colab Notebook</a>
451
  </div>
452
- <p style="color: var(--text-dim); font-size: 0.9rem;">Built for the OpenEnv Hackathon 2026</p>
453
  </footer>
454
- </div>
455
 
456
  </body>
457
  </html>
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>MolForge: The Scientific Method as a Workflow</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono&display=swap" rel="stylesheet">
 
9
  <style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  body {
11
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
12
+ -webkit-font-smoothing: antialiased;
13
+ background-color: #ffffff;
14
+ color: #0f172a;
 
 
 
 
15
  }
16
+ .prose-custom {
17
+ max-width: 65ch;
 
18
  margin: 0 auto;
 
19
  }
20
+ .shadcn-card {
21
+ border: 1px solid #e2e8f0;
22
+ background: #ffffff;
23
+ border-radius: 0.5rem;
24
+ box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
 
 
 
 
 
25
  }
26
+ .shadcn-badge {
27
+ background: #f1f5f9;
28
+ color: #475569;
29
+ font-size: 0.75rem;
 
 
 
30
  font-weight: 600;
31
+ padding: 0.125rem 0.625rem;
32
+ border-radius: 9999px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  display: inline-flex;
34
  align-items: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
+ .mono {
37
+ font-family: 'JetBrains Mono', monospace;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
  </style>
40
  </head>
41
+ <body class="bg-white">
42
+
43
+ <!-- Navigation -->
44
+ <nav class="border-b sticky top-0 bg-white/80 backdrop-blur-md z-50">
45
+ <div class="max-w-4xl mx-auto px-6 h-16 flex items-center justify-between">
46
+ <span class="font-bold tracking-tight text-lg">MolForge</span>
47
+ <div class="flex gap-6 text-sm font-medium text-slate-600">
48
+ <a href="https://github.com/Adhitya-Vardhan/molt_lab" class="hover:text-black transition-colors">GitHub</a>
49
+ <a href="https://colab.research.google.com/drive/1c6npGkGNbbbd8XFNeS6zInBpopLnJ4W4?usp=sharing" class="hover:text-black transition-colors">Training</a>
 
 
 
 
50
  </div>
51
+ </div>
52
+ </nav>
53
+
54
+ <main class="max-w-4xl mx-auto px-6 py-20">
55
+ <!-- Header -->
56
+ <div class="mb-16">
57
+ <div class="flex gap-2 mb-6">
58
+ <span class="shadcn-badge bg-indigo-50 text-indigo-700">OpenEnv Hackathon</span>
59
+ <span class="shadcn-badge">Deep Research</span>
60
  </div>
61
+ <h1 class="text-4xl md:text-5xl font-extrabold tracking-tight mb-6 leading-tight">
62
+ MolForge: Verifier-Driven RL for Drug Discovery
63
+ </h1>
64
+ <p class="text-xl text-slate-500 mb-8 leading-relaxed">
65
+ Transforming the scientific method into a reinforcement learning workflow where the LLM is the scientist, not the judge.
66
+ </p>
67
+ <div class="flex items-center gap-4 text-sm text-slate-400">
68
+ <div class="w-8 h-8 rounded-full bg-slate-200"></div>
69
+ <div>
70
+ <p class="font-semibold text-slate-900">Adhitya Vardhan</p>
71
+ <p>April 26, 2026 • 8 min read</p>
72
+ </div>
73
  </div>
74
+ </div>
75
+
76
+ <!-- Introduction -->
77
+ <div class="prose prose-slate prose-lg max-w-none mb-20 leading-relaxed text-slate-700">
78
+ <p>
79
+ In traditional drug discovery tasks, LLMs are often asked to "generate a molecule" in a single shot. But science doesn't happen in a vacuum. It happens in the loop—through trial, error, and verification.
80
+ </p>
81
+ <p class="mt-4">
82
+ <strong>MolForge</strong> is a reinforcement learning environment that simulates a medical oncology discovery lab. It forces the model to navigate real-world constraints: limited budget, molecular toxicity, and synthesis complexity.
83
+ </p>
84
+
85
+ <div class="my-12 p-6 bg-slate-50 border rounded-xl">
86
+ <p class="text-indigo-700 font-semibold mb-2">The Core Thesis</p>
87
+ <p class="italic text-slate-600">
88
+ "The LLM is not the judge. The LLM is the scientist being judged by external, verifiable reality."
89
+ </p>
90
+ </div>
91
+ </div>
92
+
93
+ <!-- Architecture Section -->
94
+ <section class="mb-24">
95
+ <h2 class="text-2xl font-bold mb-8 flex items-center gap-2">
96
+ <span class="w-2 h-8 bg-indigo-500 rounded-full"></span>
97
+ Scientific Architecture
98
+ </h2>
99
+ <p class="text-slate-600 mb-8 text-lg">
100
+ The environment is designed as a **POMDP (Partially Observable Markov Decision Process)**. This separation between what is true and what is visible is what makes the environment a scientific challenge.
101
+ </p>
102
+
103
+ <div class="grid md:grid-cols-2 gap-6 mb-12">
104
+ <div class="shadcn-card p-6 border-l-4 border-l-slate-900">
105
+ <h3 class="font-bold mb-3 text-sm uppercase tracking-wider text-slate-500">Hidden State</h3>
106
+ <p class="text-slate-600 text-sm">Ground-truth scores for potency, safety, and synthesizability. Includes sunk-cost traps and late-stage mutation shifts.</p>
107
  </div>
108
+ <div class="shadcn-card p-6 border-l-4 border-l-indigo-500">
109
+ <h3 class="font-bold mb-3 text-sm uppercase tracking-wider text-slate-500">Visible State</h3>
110
+ <p class="text-slate-600 text-sm">Noisy assay reports from RDKit and TDC, remaining budget, and structured governance feedback.</p>
111
  </div>
112
  </div>
 
113
 
114
+ <div class="shadcn-card p-4 bg-slate-50">
115
+ <img src="assets/molforge_architecture.png" alt="Architecture" class="rounded-lg w-full">
116
+ <p class="mt-4 text-center text-xs text-slate-400 font-medium italic">MolForge Closed-Loop Feedback Flow</p>
 
 
117
  </div>
118
+ </section>
119
 
120
+ <!-- Seven Pillars -->
121
+ <section class="mb-24">
122
+ <h2 class="text-2xl font-bold mb-10">The Seven Pillars of MolForge</h2>
123
+ <div class="grid gap-4">
124
+ <div class="shadcn-card p-6 hover:bg-slate-50 transition-colors cursor-default">
125
+ <h4 class="font-bold text-indigo-600 mb-1">1. Verifier-Based Evaluation</h4>
126
+ <p class="text-slate-600 text-sm">The LLM is held accountable by real-world verifiers like **RDKit** and **TDC** instead of relying on self-judgment.</p>
127
  </div>
128
+ <div class="shadcn-card p-6 hover:bg-slate-50 transition-colors cursor-default">
129
+ <h4 class="font-bold text-indigo-600 mb-1">2. Physics-Grounded Simulation</h4>
130
+ <p class="text-slate-600 text-sm">Heuristic docking simulates receptor fit, lipophilicity (LogP), and polarity (TPSA) in milliseconds.</p>
 
131
  </div>
132
+ <div class="shadcn-card p-6 hover:bg-slate-50 transition-colors cursor-default">
133
+ <h4 class="font-bold text-indigo-600 mb-1">3. Self-Correction Loop</h4>
134
+ <p class="text-slate-600 text-sm">Agents receive structured feedback on every molecular edit, allowing them to repair liabilities in real-time.</p>
 
135
  </div>
136
+ <div class="shadcn-card p-6 hover:bg-slate-50 transition-colors cursor-default">
137
+ <h4 class="font-bold text-indigo-600 mb-1">4. Decomposed Reward Architecture</h4>
138
+ <p class="text-slate-600 text-sm">Rewards are broken down by research, coordination, and quality for maximum observability.</p>
 
139
  </div>
140
+ <div class="shadcn-card p-6 hover:bg-slate-50 transition-colors cursor-default">
141
+ <h4 class="font-bold text-indigo-600 mb-1">5. Strategic Training Modes</h4>
142
+ <p class="text-slate-600 text-sm">Curriculum mode provides partial credit "breadcrumbs" to solve the sparse reward problem in RL.</p>
 
143
  </div>
144
+ <div class="shadcn-card p-6 hover:bg-slate-50 transition-colors cursor-default">
145
+ <h4 class="font-bold text-indigo-600 mb-1">6. Multi-Agent Governance</h4>
146
+ <p class="text-slate-600 text-sm">A specialized team (Chemist, Toxicologist, Planner) must coordinate and approve every major move.</p>
 
147
  </div>
148
+ <div class="shadcn-card p-6 hover:bg-slate-50 transition-colors cursor-default">
149
+ <h4 class="font-bold text-indigo-600 mb-1">7. Scientific Model Improvement</h4>
150
+ <p class="text-slate-600 text-sm">Verifier feedback drives the model toward scientifically sound designs rather than pattern matching.</p>
 
151
  </div>
152
  </div>
153
  </section>
154
 
155
+ <!-- Training & Results -->
156
+ <section class="mb-24">
157
+ <h2 class="text-2xl font-bold mb-8">Training & Performance</h2>
158
+ <div class="prose prose-slate text-slate-600 mb-12 leading-relaxed">
159
+ <p>
160
+ We trained a **Qwen3.5-2B** model using GRPO against the MolForge environment. By transitioning from a simple SFT baseline to a verifier-driven RL policy, we saw significant improvements across all difficulty levels.
161
+ </p>
 
 
 
162
  </div>
163
 
164
+ <div class="shadcn-card overflow-hidden mb-12">
165
+ <table class="w-full text-left text-sm">
166
+ <thead class="bg-slate-50 border-b">
167
  <tr>
168
+ <th class="px-6 py-4 font-semibold text-slate-900">Difficulty</th>
169
+ <th class="px-6 py-4 font-semibold text-slate-900">Before (SFT)</th>
170
+ <th class="px-6 py-4 font-semibold text-slate-900">After (RL)</th>
171
+ <th class="px-6 py-4 font-semibold text-slate-900">Improvement</th>
172
  </tr>
173
  </thead>
174
+ <tbody class="divide-y">
175
  <tr>
176
+ <td class="px-6 py-4 font-medium">Easy</td>
177
+ <td class="px-6 py-4 text-slate-500">0.1167</td>
178
+ <td class="px-6 py-4 font-semibold">0.1295</td>
179
+ <td class="px-6 py-4 text-emerald-600 font-bold">+10.9%</td>
180
  </tr>
181
  <tr>
182
+ <td class="px-6 py-4 font-medium">Medium</td>
183
+ <td class="px-6 py-4 text-slate-500">0.1167</td>
184
+ <td class="px-6 py-4 font-semibold">0.1278</td>
185
+ <td class="px-6 py-4 text-emerald-600 font-bold">+9.5%</td>
186
  </tr>
187
  <tr>
188
+ <td class="px-6 py-4 font-medium">Hard</td>
189
+ <td class="px-6 py-4 text-slate-500">0.0800</td>
190
+ <td class="px-6 py-4 font-semibold">0.0866</td>
191
+ <td class="px-6 py-4 text-emerald-600 font-bold">+8.3%</td>
192
  </tr>
193
  </tbody>
194
  </table>
195
  </div>
196
 
197
+ <div class="grid md:grid-cols-2 gap-8 mb-12">
198
+ <div class="shadcn-card p-4">
199
+ <img src="assets/reward_curve.png" alt="Reward Curve" class="rounded border">
200
+ <p class="mt-3 text-center text-xs text-slate-400 font-medium">Training Reward Progression</p>
201
+ </div>
202
+ <div class="shadcn-card p-4">
203
+ <img src="assets/Logs.png" alt="Logs" class="rounded border">
204
+ <p class="mt-3 text-center text-xs text-slate-400 font-medium">Governance Telemetry</p>
205
+ </div>
206
+ </div>
207
+ </section>
208
+
209
+ <!-- Final Takeaway -->
210
+ <section class="mb-24 pt-12 border-t text-center">
211
+ <h2 class="text-3xl font-extrabold mb-6">Join the Scientific RL Revolution</h2>
212
+ <p class="text-slate-500 max-w-xl mx-auto mb-10 text-lg">
213
+ MolForge is open source and ready for further exploration. Explore the search space of 256 fragment combinations across oncology scenarios.
214
+ </p>
215
+ <div class="flex flex-wrap justify-center gap-4">
216
+ <a href="https://github.com/Adhitya-Vardhan/molt_lab" class="px-8 py-3 bg-slate-900 text-white font-bold rounded-lg hover:bg-slate-800 transition-all">GitHub Repo</a>
217
+ <a href="https://colab.research.google.com/drive/1c6npGkGNbbbd8XFNeS6zInBpopLnJ4W4?usp=sharing" class="px-8 py-3 bg-white border border-slate-200 text-slate-900 font-bold rounded-lg hover:bg-slate-50 transition-all">Colab Notebook</a>
218
  </div>
219
  </section>
220
 
221
  <!-- Footer -->
222
+ <footer class="py-12 border-t text-sm text-slate-400 flex flex-col md:flex-row justify-between items-center gap-4">
223
+ <p>© 2026 MolForge • Built for OpenEnv</p>
224
+ <div class="flex gap-6">
225
+ <a href="https://huggingface.co/Adhitya122/molforge-grpo-oncology" class="hover:text-slate-600">Model Card</a>
226
+ <a href="https://huggingface.co/spaces/Adhitya122/molforge" class="hover:text-slate-600">Space Home</a>
227
  </div>
 
228
  </footer>
229
+ </main>
230
 
231
  </body>
232
  </html>