Ev3Dev commited on
Commit
51af902
·
verified ·
1 Parent(s): 2bf5069

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. _dashboard_state.json +115 -226
  2. run_agent.py +9 -2
_dashboard_state.json CHANGED
@@ -1,31 +1,31 @@
1
  {
2
- "timestamp": 1772994420.314485,
3
- "step": 20,
4
  "episode_done": false,
5
- "cumulative_reward": 1.8024931955050638,
6
- "gen_time_s": 23.52,
7
- "model_response_raw": "{\n\"action_type\": \"design_followup_experiment\",\n\"method\": \"\",\n\"parameters\": {\n\"disease_comparison_groups\": true,\n\"treatment_condition_focus\": false,\n\"criterion_for_target_genes\": \"Differentially Expressed Between Healthy vs AD Brain Cells\"\n},\n\"Justification\": \"The primary goal remains identifying DGEs across conditions rather than targeting specific known pathways first. While 'filter_data' was previously performed but failed due to lack of validated markers from previous attempts, re-running filtering based solely on technical/quality criteria would still leave ambiguous candidates if noise pe",
8
  "model_thinking": "",
9
  "thinking_enabled": false,
10
  "task": {
11
- "problem_statement": "Identify differentially expressed genes between Alzheimer's disease and healthy brain tissue using single-cell RNA sequencing.",
12
  "organism": "human",
13
- "tissue": "brain",
14
  "modality": "scRNA-seq",
15
  "conditions": [
16
- "healthy",
17
- "Alzheimer"
18
  ],
19
- "budget_limit": 84032.3891133686,
20
- "time_limit_days": 116.86792941835967
21
  },
22
  "resources": {
23
- "budget_used": 28900.0,
24
- "budget_remaining": 55132.39,
25
- "time_used_days": 18.0,
26
- "time_remaining_days": 98.9,
27
  "samples_consumed": 0,
28
- "compute_hours_used": 12.0
29
  },
30
  "pipeline_history": [
31
  {
@@ -34,7 +34,7 @@
34
  "method": null,
35
  "output_summary": "Collected 6 samples (quality=0.56)",
36
  "success": true,
37
- "quality_score": 0.562,
38
  "resource_cost": 5000.0,
39
  "time_cost_days": 7.0
40
  },
@@ -47,175 +47,44 @@
47
  "quality_score": 1.0,
48
  "resource_cost": 500.0,
49
  "time_cost_days": 1.0
50
- },
51
- {
52
- "step_index": 3,
53
- "action_type": "sequence_cells",
54
- "method": null,
55
- "output_summary": "Action blocked: Cannot sequence without library preparation",
56
- "success": false,
57
- "quality_score": 1.0,
58
- "resource_cost": 15000.0,
59
- "time_cost_days": 5.0
60
- },
61
- {
62
- "step_index": 4,
63
- "action_type": "prepare_library",
64
- "method": null,
65
- "output_summary": "Library prepared (complexity=0.75)",
66
- "success": true,
67
- "quality_score": 0.751,
68
- "resource_cost": 8000.0,
69
- "time_cost_days": 3.0
70
- },
71
- {
72
- "step_index": 5,
73
- "action_type": "sequence_cells",
74
- "method": null,
75
- "output_summary": "Sequenced 4574 cells, 9256 genes detected, median UMI=3083",
76
- "success": true,
77
- "quality_score": 0.47,
78
- "resource_cost": 15000.0,
79
- "time_cost_days": 5.0
80
- },
81
- {
82
- "step_index": 6,
83
- "action_type": "filter_data",
84
- "method": null,
85
- "output_summary": "Action blocked: Cannot filter data before QC",
86
- "success": false,
87
- "quality_score": 1.0,
88
- "resource_cost": 50.0,
89
- "time_cost_days": 0.2
90
- },
91
- {
92
- "step_index": 7,
93
- "action_type": "run_qc",
94
- "method": null,
95
- "output_summary": "QC metrics computed",
96
- "success": true,
97
- "quality_score": 0.829,
98
- "resource_cost": 100.0,
99
- "time_cost_days": 0.5
100
- },
101
- {
102
- "step_index": 8,
103
- "action_type": "filter_data",
104
- "method": null,
105
- "output_summary": "Filtered 4574 \u2192 3996 cells (87% retained)",
106
- "success": true,
107
- "quality_score": 0.874,
108
- "resource_cost": 50.0,
109
- "time_cost_days": 0.2
110
- },
111
- {
112
- "step_index": 9,
113
- "action_type": "normalize_data",
114
- "method": null,
115
- "output_summary": "Normalized with log_normalize",
116
- "success": true,
117
- "quality_score": 1.0,
118
- "resource_cost": 50.0,
119
- "time_cost_days": 0.2
120
- },
121
- {
122
- "step_index": 10,
123
- "action_type": "cluster_cells",
124
- "method": null,
125
- "output_summary": "Found 7 clusters",
126
- "success": true,
127
- "quality_score": 0.79,
128
- "resource_cost": 100.0,
129
- "time_cost_days": 0.5
130
- },
131
- {
132
- "step_index": 11,
133
- "action_type": "design_followup_experiment",
134
- "method": null,
135
- "output_summary": "Follow-up experiment design proposed (evidence_signals=1)",
136
- "success": true,
137
- "quality_score": 0.28,
138
- "resource_cost": 100.0,
139
- "time_cost_days": 0.5
140
- },
141
- {
142
- "step_index": 12,
143
- "action_type": "synthesize_conclusion",
144
- "method": null,
145
- "output_summary": "Action blocked: Cannot synthesise conclusion before discovering or validating markers; Cannot synthesise conclusion befo",
146
- "success": false,
147
- "quality_score": 1.0,
148
- "resource_cost": 0.0,
149
- "time_cost_days": 0.5
150
- },
151
- {
152
- "step_index": 13,
153
- "action_type": "marker_selection",
154
- "method": null,
155
- "output_summary": "Action blocked: Cannot select markers without DE results",
156
- "success": false,
157
- "quality_score": 1.0,
158
- "resource_cost": 100.0,
159
- "time_cost_days": 0.5
160
- },
161
- {
162
- "step_index": 14,
163
- "action_type": "synthesize_conclusion",
164
- "method": null,
165
- "output_summary": "Action blocked: Cannot synthesise conclusion before discovering or validating markers; Cannot synthesise conclusion befo",
166
- "success": false,
167
- "quality_score": 1.0,
168
- "resource_cost": 0.0,
169
- "time_cost_days": 0.5
170
  }
171
  ],
172
  "current_action": {
173
- "action_type": "synthesize_conclusion",
174
  "method": null,
175
  "parameters": {
176
- "claims": [
177
- {
178
- "top_markers": [],
179
- "causal_mechanisms": [],
180
- "predicted_pathways": {},
181
- "confidence": 0.5,
182
- "claim_type": "correlational",
183
- "claim": "forced terminal conclusion"
184
- }
185
- ]
186
  },
187
- "justification": "forced terminal conclusion",
188
- "confidence": 0.5
189
  },
190
  "latest_output": {
191
- "summary": "Action blocked: Cannot synthesise conclusion before discovering or validating markers; Cannot synthesise conclusion before inferring pathways or mechanisms",
192
- "success": false,
193
  "quality_score": 1.0,
194
  "uncertainty": 0.0,
195
  "warnings": [],
196
- "data_preview": null
197
  },
198
  "discovered_markers": [],
199
  "candidate_mechanisms": [],
200
- "rule_violations": [
201
- "Cannot synthesise conclusion before discovering or validating markers",
202
- "Cannot synthesise conclusion before inferring pathways or mechanisms"
203
- ],
204
  "uncertainty_summary": {
205
- "avg_uncertainty": 0.224,
206
- "avg_quality": 0.814
207
  },
208
  "reward_breakdown": {
209
- "validity": -1.0,
210
- "ordering": 0.0,
211
- "info_gain": 0.0,
212
- "efficiency": 0.0,
213
- "novelty": 0.0,
214
- "penalty": -1.0,
215
  "shaping": 0.0,
216
  "terminal": 0.0,
217
- "total": -2.0,
218
- "hard_violations": 2.0,
219
  "term_validity": 0.0,
220
  "term_ordering": 0.0,
221
  "term_info_gain": 0.0,
@@ -229,83 +98,101 @@
229
  "latent": {
230
  "cell_populations": [
231
  {
232
- "name": "excitatory_neuron",
233
- "proportion": 0.349,
234
  "marker_genes": [
235
- "SLC17A7",
236
- "CAMK2A",
237
- "NRGN"
238
  ],
239
- "state": "stressed"
240
  },
241
  {
242
- "name": "inhibitory_neuron",
243
- "proportion": 0.209,
244
  "marker_genes": [
245
- "GAD1",
246
- "GAD2",
247
- "SLC32A1"
248
  ],
249
- "state": "normal"
250
  },
251
  {
252
- "name": "astrocyte",
253
- "proportion": 0.211,
254
  "marker_genes": [
255
- "GFAP",
256
- "AQP4",
257
- "SLC1A3"
258
  ],
259
- "state": "quiescent"
260
  },
261
  {
262
- "name": "oligodendrocyte",
263
- "proportion": 0.153,
264
  "marker_genes": [
265
- "MBP",
266
- "PLP1",
267
- "MOG"
268
  ],
269
- "state": "myelinating"
270
  },
271
  {
272
- "name": "OPC",
273
- "proportion": 0.078,
274
  "marker_genes": [
275
- "PDGFRA",
276
- "CSPG4",
277
- "OLIG2"
278
  ],
279
- "state": "progenitor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  }
281
  ],
282
  "true_markers": [
283
- "TREM2",
284
- "APOE",
285
- "GFAP",
286
- "C1QA"
287
  ],
288
  "causal_mechanisms": [
289
- "TREM2-mediated microglial activation in amyloid clearance",
290
- "complement-driven synaptic pruning",
291
- "reactive astrogliosis amplifying neuroinflammation"
292
  ],
293
  "true_pathways": {
294
- "complement_cascade": 0.839,
295
- "neuroinflammation": 0.805,
296
- "amyloid_processing": 0.666,
297
- "synaptic_signalling": 0.394,
298
- "lipid_metabolism": 0.674
299
  },
300
- "true_de_genes_count": 10,
301
  "true_regulatory_network_size": 0,
302
  "confounders": {},
303
- "n_true_cells": 7619,
304
  "technical": {
305
- "ambient_rna_fraction": 0.04108598341080635,
306
- "doublet_rate": 0.045763110874719674,
307
- "dropout_rate": 0.07138299827651534,
308
- "sample_quality": 0.9242864922806572,
309
  "library_complexity": 0.8,
310
  "capture_efficiency": 0.6
311
  },
@@ -313,29 +200,31 @@
313
  "samples_collected": true,
314
  "cohort_selected": true,
315
  "cells_cultured": false,
316
- "library_prepared": true,
317
  "perturbation_applied": false,
318
- "cells_sequenced": true,
319
- "qc_performed": true,
320
- "data_filtered": true,
321
- "data_normalized": true,
322
  "batches_integrated": false,
323
- "cells_clustered": true,
324
  "de_performed": false,
325
  "trajectories_inferred": false,
326
  "pathways_analyzed": false,
327
  "networks_inferred": false,
328
  "markers_discovered": false,
329
  "markers_validated": false,
330
- "followup_designed": true,
331
  "subagent_review_requested": false,
332
  "conclusion_reached": false,
333
- "n_cells_sequenced": 4574,
334
- "n_cells_after_filter": 3996,
335
- "n_clusters_found": "7",
336
  "n_de_genes_found": null,
337
  "n_markers_found": null
338
  },
339
- "hidden_failure_conditions": []
 
 
340
  }
341
  }
 
1
  {
2
+ "timestamp": 1772999012.107634,
3
+ "step": 2,
4
  "episode_done": false,
5
+ "cumulative_reward": 2.3019429157970084,
6
+ "gen_time_s": 12.96,
7
+ "model_response_raw": "{\n\"action_type\": \"select_cohort\",\n\"method\": None,\n\"parameters\": {\n\"categorical_variables\": [],\n\"factors_to_stratify_by\": []\n},\n\"Justification\": \"The recent historical sample was from 'blood', but the target tissue specified in the query ('synovial_fluid') differs significantly due to disease pathology affecting local fluid composition. Stratifying based on organ/tissue type ensures cohort homogeneity required for comparing RA vs non-RA controls within the specific joint space.\",\n\"Confidence\": 0.9\n}",
8
  "model_thinking": "",
9
  "thinking_enabled": false,
10
  "task": {
11
+ "problem_statement": "Determine the effect of JAK inhibitor treatment on T-cell activation states in rheumatoid arthritis.",
12
  "organism": "human",
13
+ "tissue": "synovial_fluid",
14
  "modality": "scRNA-seq",
15
  "conditions": [
16
+ "untreated_RA",
17
+ "JAK_inhibitor_treated"
18
  ],
19
+ "budget_limit": 124112.45173009757,
20
+ "time_limit_days": 174.85852269116046
21
  },
22
  "resources": {
23
+ "budget_used": 5500.0,
24
+ "budget_remaining": 118612.45,
25
+ "time_used_days": 8.0,
26
+ "time_remaining_days": 166.9,
27
  "samples_consumed": 0,
28
+ "compute_hours_used": 0.0
29
  },
30
  "pipeline_history": [
31
  {
 
34
  "method": null,
35
  "output_summary": "Collected 6 samples (quality=0.56)",
36
  "success": true,
37
+ "quality_score": 0.563,
38
  "resource_cost": 5000.0,
39
  "time_cost_days": 7.0
40
  },
 
47
  "quality_score": 1.0,
48
  "resource_cost": 500.0,
49
  "time_cost_days": 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "current_action": {
53
+ "action_type": "select_cohort",
54
  "method": null,
55
  "parameters": {
56
+ "categorical_variables": [],
57
+ "factors_to_stratify_by": []
 
 
 
 
 
 
 
 
58
  },
59
+ "justification": "The recent historical sample was from 'blood', but the target tissue specified in the query ('synovial_fluid') differs significantly due to disease pathology affecting local fluid composition. Stratifying based on organ/tissue type ensures cohort homogeneity required for comparing RA vs non-RA controls within the specific joint space.",
60
+ "confidence": 0.9
61
  },
62
  "latest_output": {
63
+ "summary": "Selected cohort of 4 samples with criteria {}",
64
+ "success": true,
65
  "quality_score": 1.0,
66
  "uncertainty": 0.0,
67
  "warnings": [],
68
+ "data_preview": "{\"criteria\": {}, \"n_selected\": 4}"
69
  },
70
  "discovered_markers": [],
71
  "candidate_mechanisms": [],
72
+ "rule_violations": [],
 
 
 
73
  "uncertainty_summary": {
74
+ "avg_uncertainty": 0.0,
75
+ "avg_quality": 0.781
76
  },
77
  "reward_breakdown": {
78
+ "validity": 0.3,
79
+ "ordering": 0.06,
80
+ "info_gain": 0.4,
81
+ "efficiency": 0.294,
82
+ "novelty": 0.1,
83
+ "penalty": -0.0,
84
  "shaping": 0.0,
85
  "terminal": 0.0,
86
+ "total": 1.154,
87
+ "tool_fit": 0.0,
88
  "term_validity": 0.0,
89
  "term_ordering": 0.0,
90
  "term_info_gain": 0.0,
 
98
  "latent": {
99
  "cell_populations": [
100
  {
101
+ "name": "CD4_Th1",
102
+ "proportion": 0.239,
103
  "marker_genes": [
104
+ "IFNG",
105
+ "TBX21",
106
+ "IL2"
107
  ],
108
+ "state": "activated"
109
  },
110
  {
111
+ "name": "CD4_Th17",
112
+ "proportion": 0.137,
113
  "marker_genes": [
114
+ "IL17A",
115
+ "RORC",
116
+ "CCR6"
117
  ],
118
+ "state": "activated"
119
  },
120
  {
121
+ "name": "CD4_Treg",
122
+ "proportion": 0.071,
123
  "marker_genes": [
124
+ "FOXP3",
125
+ "IL2RA",
126
+ "CTLA4"
127
  ],
128
+ "state": "regulatory"
129
  },
130
  {
131
+ "name": "CD8_cytotoxic",
132
+ "proportion": 0.161,
133
  "marker_genes": [
134
+ "GZMB",
135
+ "PRF1",
136
+ "CD8A"
137
  ],
138
+ "state": "activated"
139
  },
140
  {
141
+ "name": "macrophage",
142
+ "proportion": 0.145,
143
  "marker_genes": [
144
+ "CD68",
145
+ "CD163",
146
+ "MARCO"
147
  ],
148
+ "state": "inflammatory"
149
+ },
150
+ {
151
+ "name": "fibroblast",
152
+ "proportion": 0.14,
153
+ "marker_genes": [
154
+ "COL1A1",
155
+ "FAP",
156
+ "THY1"
157
+ ],
158
+ "state": "activated"
159
+ },
160
+ {
161
+ "name": "B_cell",
162
+ "proportion": 0.109,
163
+ "marker_genes": [
164
+ "CD19",
165
+ "MS4A1",
166
+ "CD79A"
167
+ ],
168
+ "state": "quiescent"
169
  }
170
  ],
171
  "true_markers": [
172
+ "STAT1",
173
+ "SOCS1",
174
+ "IFNG"
 
175
  ],
176
  "causal_mechanisms": [
177
+ "JAK-STAT pathway inhibition reduces Th1/Th17 activation",
178
+ "Compensatory Treg expansion under JAK inhibition"
 
179
  ],
180
  "true_pathways": {
181
+ "JAK_STAT_signalling": 0.3,
182
+ "Th1_differentiation": 0.35,
183
+ "Th17_differentiation": 0.4,
184
+ "cytokine_signalling": 0.45,
185
+ "regulatory_T_cell_function": 0.7
186
  },
187
+ "true_de_genes_count": 11,
188
  "true_regulatory_network_size": 0,
189
  "confounders": {},
190
+ "n_true_cells": 13025,
191
  "technical": {
192
+ "ambient_rna_fraction": 0.05940406458962544,
193
+ "doublet_rate": 0.02562860771136133,
194
+ "dropout_rate": 0.09574882286483327,
195
+ "sample_quality": 0.9514110325345917,
196
  "library_complexity": 0.8,
197
  "capture_efficiency": 0.6
198
  },
 
200
  "samples_collected": true,
201
  "cohort_selected": true,
202
  "cells_cultured": false,
203
+ "library_prepared": false,
204
  "perturbation_applied": false,
205
+ "cells_sequenced": false,
206
+ "qc_performed": false,
207
+ "data_filtered": false,
208
+ "data_normalized": false,
209
  "batches_integrated": false,
210
+ "cells_clustered": false,
211
  "de_performed": false,
212
  "trajectories_inferred": false,
213
  "pathways_analyzed": false,
214
  "networks_inferred": false,
215
  "markers_discovered": false,
216
  "markers_validated": false,
217
+ "followup_designed": false,
218
  "subagent_review_requested": false,
219
  "conclusion_reached": false,
220
+ "n_cells_sequenced": null,
221
+ "n_cells_after_filter": null,
222
+ "n_clusters_found": null,
223
  "n_de_genes_found": null,
224
  "n_markers_found": null
225
  },
226
+ "hidden_failure_conditions": [
227
+ "High ambient RNA may confound DE in low-abundance transcripts"
228
+ ]
229
  }
230
  }
run_agent.py CHANGED
@@ -823,7 +823,11 @@ def main():
823
  """Read and consume a command file written by the dashboard."""
824
  try:
825
  raw = DASHBOARD_CMD_PATH.read_text(encoding="utf-8")
826
- DASHBOARD_CMD_PATH.unlink(missing_ok=True)
 
 
 
 
827
  return json.loads(raw)
828
  except (FileNotFoundError, json.JSONDecodeError):
829
  return None
@@ -1064,7 +1068,10 @@ def main():
1064
  log(f" Pathways: {c.predicted_pathways}")
1065
  log("=" * 70)
1066
 
1067
- DASHBOARD_CMD_PATH.unlink(missing_ok=True)
 
 
 
1068
  run_episode()
1069
 
1070
  while True:
 
823
  """Read and consume a command file written by the dashboard."""
824
  try:
825
  raw = DASHBOARD_CMD_PATH.read_text(encoding="utf-8")
826
+ try:
827
+ DASHBOARD_CMD_PATH.unlink(missing_ok=True)
828
+ except OSError:
829
+ # Windows: file may be locked by dashboard; still consumed
830
+ pass
831
  return json.loads(raw)
832
  except (FileNotFoundError, json.JSONDecodeError):
833
  return None
 
1068
  log(f" Pathways: {c.predicted_pathways}")
1069
  log("=" * 70)
1070
 
1071
+ try:
1072
+ DASHBOARD_CMD_PATH.unlink(missing_ok=True)
1073
+ except OSError:
1074
+ pass
1075
  run_episode()
1076
 
1077
  while True: