nkshirsa commited on
Commit
c98d2ae
·
verified ·
1 Parent(s): 4bca821

Add phd_research_os/obsidian_export.py

Browse files
Files changed (1) hide show
  1. phd_research_os/obsidian_export.py +327 -0
phd_research_os/obsidian_export.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PhD Research OS — Obsidian Export (Phase 4)
3
+ =============================================
4
+ One-directional export: System → Obsidian vault.
5
+ Generates Markdown with YAML frontmatter and wiki-links.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+
13
+ from .db import get_db, search_claims, get_goals_by_priority, from_fixed
14
+
15
+
16
+ class ObsidianExporter:
17
+ """
18
+ Export Research OS data to Obsidian vault as linked Markdown notes.
19
+
20
+ Structure:
21
+ /vault/
22
+ ├── Claims/
23
+ │ └── CLM_XXXX.md
24
+ ├── Sources/
25
+ │ └── DOI_XXXX.md
26
+ ├── Goals/
27
+ │ └── GOAL_XXXX.md
28
+ └── Dashboard.md
29
+ """
30
+
31
+ def __init__(self, vault_path: str = "vault", db_path: str = None):
32
+ self.vault_path = Path(vault_path)
33
+ self.db_path = db_path or os.environ.get("RESEARCH_OS_DB", "data/research_os.db")
34
+
35
+ # Create directories
36
+ for d in ["Claims", "Sources", "Goals", "Conflicts", "Decisions"]:
37
+ (self.vault_path / d).mkdir(parents=True, exist_ok=True)
38
+
39
+ def export_all(self):
40
+ """Export everything to Obsidian vault. Idempotent — overwrites existing."""
41
+ conn = get_db(self.db_path)
42
+
43
+ print("Exporting to Obsidian vault...")
44
+
45
+ # Export claims
46
+ claims = conn.execute("SELECT * FROM claims ORDER BY claim_id").fetchall()
47
+ for claim in claims:
48
+ self._export_claim(dict(claim))
49
+ print(f" Claims: {len(claims)}")
50
+
51
+ # Export sources
52
+ sources = conn.execute("SELECT * FROM sources ORDER BY doi").fetchall()
53
+ for source in sources:
54
+ self._export_source(dict(source))
55
+ print(f" Sources: {len(sources)}")
56
+
57
+ # Export goals
58
+ goals = conn.execute("SELECT * FROM goals ORDER BY goal_id").fetchall()
59
+ for goal in goals:
60
+ self._export_goal(dict(goal))
61
+ print(f" Goals: {len(goals)}")
62
+
63
+ # Export conflicts
64
+ conflicts = conn.execute("SELECT * FROM conflicts ORDER BY conflict_id").fetchall()
65
+ for conflict in conflicts:
66
+ self._export_conflict(dict(conflict))
67
+ print(f" Conflicts: {len(conflicts)}")
68
+
69
+ # Export decisions
70
+ decisions = conn.execute("SELECT * FROM decisions ORDER BY decision_id").fetchall()
71
+ for decision in decisions:
72
+ self._export_decision(dict(decision))
73
+ print(f" Decisions: {len(decisions)}")
74
+
75
+ # Generate dashboard
76
+ self._export_dashboard(conn)
77
+ print(f" Dashboard: ✓")
78
+
79
+ conn.close()
80
+ print(f"\nVault exported to: {self.vault_path}")
81
+
82
+ def _export_claim(self, claim: dict):
83
+ """Export a single claim as Markdown with YAML frontmatter."""
84
+ cid = claim['claim_id']
85
+ confidence = from_fixed(claim['confidence']) if isinstance(claim['confidence'], int) else claim['confidence']
86
+ missing = json.loads(claim.get('missing_fields', '[]'))
87
+ params = json.loads(claim.get('parameters', '{}'))
88
+
89
+ content = f"""---
90
+ claim_id: {cid}
91
+ source_doi: {claim.get('source_doi', 'unknown')}
92
+ epistemic_tag: {claim['epistemic_tag']}
93
+ confidence: {confidence}
94
+ status: {claim['status']}
95
+ study_type: {claim.get('study_type', 'unknown')}
96
+ is_canonical: {bool(claim.get('is_canonical', 0))}
97
+ schema_version: {claim.get('schema_version', '1.0')}
98
+ created_at: {claim.get('created_at', '')}
99
+ ---
100
+
101
+ ## Claim Text
102
+
103
+ {claim['text']}
104
+
105
+ ## Parameters
106
+
107
+ {self._format_params(params)}
108
+
109
+ ## Missing Fields
110
+
111
+ {', '.join(missing) if missing else 'None — claim is complete'}
112
+
113
+ ## Linked Source
114
+
115
+ {f'[[{claim.get("source_doi", "")}]]' if claim.get('source_doi') else 'No source linked'}
116
+ """
117
+
118
+ if claim.get('expert_override'):
119
+ override = json.loads(claim['expert_override']) if isinstance(claim['expert_override'], str) else claim['expert_override']
120
+ content += f"""
121
+ ## Expert Override
122
+
123
+ - **By:** {override.get('who', 'unknown')}
124
+ - **Rationale:** {override.get('rationale', '')}
125
+ - **Override ID:** {override.get('override_id', '')}
126
+ """
127
+
128
+ filepath = self.vault_path / "Claims" / f"{cid}.md"
129
+ filepath.write_text(content)
130
+
131
+ def _export_source(self, source: dict):
132
+ """Export a source as Markdown."""
133
+ doi = source['doi']
134
+ authors = json.loads(source.get('authors', '[]'))
135
+
136
+ safe_doi = doi.replace("/", "_").replace(".", "_")
137
+
138
+ content = f"""---
139
+ doi: {doi}
140
+ title: "{source.get('title', '')}"
141
+ year: {source.get('year', '')}
142
+ journal: "{source.get('journal', '')}"
143
+ journal_tier: {source.get('journal_tier', '')}
144
+ is_canonical: {bool(source.get('is_canonical', 0))}
145
+ ---
146
+
147
+ ## {source.get('title', 'Untitled')}
148
+
149
+ **DOI:** [{doi}](https://doi.org/{doi})
150
+ **Authors:** {', '.join(authors)}
151
+ **Year:** {source.get('year', 'Unknown')}
152
+ **Journal:** {source.get('journal', 'Unknown')} (Tier {source.get('journal_tier', '?')})
153
+ **Study Type:** {source.get('study_type', 'Unknown')}
154
+ """
155
+
156
+ filepath = self.vault_path / "Sources" / f"{safe_doi}.md"
157
+ filepath.write_text(content)
158
+
159
+ def _export_goal(self, goal: dict):
160
+ """Export a goal as Markdown."""
161
+ gid = goal['goal_id']
162
+ linked = json.loads(goal.get('linked_claim_ids', '[]'))
163
+
164
+ content = f"""---
165
+ goal_id: {gid}
166
+ priority: {goal['priority']}
167
+ status: {goal['status']}
168
+ ---
169
+
170
+ ## {goal['description']}
171
+
172
+ **Priority:** {goal['priority']}
173
+ **Status:** {goal['status']}
174
+
175
+ ## Linked Claims
176
+
177
+ {chr(10).join(f'- [[{cid}]]' for cid in linked) if linked else 'No claims linked yet'}
178
+ """
179
+
180
+ filepath = self.vault_path / "Goals" / f"{gid}.md"
181
+ filepath.write_text(content)
182
+
183
+ def _export_conflict(self, conflict: dict):
184
+ """Export a conflict as Markdown."""
185
+ content = f"""---
186
+ conflict_id: {conflict['conflict_id']}
187
+ conflict_type: {conflict['conflict_type']}
188
+ resolution_status: {conflict['resolution_status']}
189
+ hypothesis_confidence: {conflict.get('hypothesis_confidence', 'low')}
190
+ ---
191
+
192
+ ## Conflict: [[{conflict['claim_a_id']}]] vs [[{conflict['claim_b_id']}]]
193
+
194
+ **Type:** {conflict['conflict_type']}
195
+ **Status:** {conflict['resolution_status']}
196
+
197
+ ## Generated Hypothesis
198
+
199
+ {conflict.get('generated_hypothesis', 'No hypothesis generated')}
200
+
201
+ ⚠️ **Hypothesis confidence: LOW** — Requires human review
202
+
203
+ ## Key Differences
204
+
205
+ {chr(10).join(f'- {d}' for d in json.loads(conflict.get('key_differences', '[]')))}
206
+ """
207
+
208
+ filepath = self.vault_path / "Conflicts" / f"{conflict['conflict_id']}.md"
209
+ filepath.write_text(content)
210
+
211
+ def _export_decision(self, decision: dict):
212
+ """Export a decision as Markdown."""
213
+ info_gain = from_fixed(decision['expected_information_gain']) if isinstance(decision['expected_information_gain'], int) else decision['expected_information_gain']
214
+ linked = json.loads(decision.get('linked_claim_ids', '[]'))
215
+
216
+ content = f"""---
217
+ decision_id: {decision['decision_id']}
218
+ action: {decision['recommended_action']}
219
+ priority: {decision.get('priority', 'medium')}
220
+ status: {decision['status']}
221
+ info_gain: {info_gain}
222
+ ---
223
+
224
+ ## {decision.get('action_description', 'No description')}
225
+
226
+ **Action:** {decision['recommended_action']}
227
+ **Priority:** {decision.get('priority', 'medium')}
228
+ **Status:** {decision['status']}
229
+ **Expected Info Gain:** {info_gain}
230
+ **Estimated Effort:** {decision.get('estimated_effort', 'Unknown')}
231
+
232
+ ## Linked Goal
233
+
234
+ {f'[[{decision.get("linked_goal_id", "")}]]' if decision.get('linked_goal_id') else 'No goal linked'}
235
+
236
+ ## Linked Claims
237
+
238
+ {chr(10).join(f'- [[{cid}]]' for cid in linked) if linked else 'None'}
239
+ """
240
+
241
+ if decision.get('why_not_log'):
242
+ content += f"""
243
+ ## Why Not Log (Rejection Reason)
244
+
245
+ {decision['why_not_log']}
246
+ """
247
+
248
+ filepath = self.vault_path / "Decisions" / f"{decision['decision_id']}.md"
249
+ filepath.write_text(content)
250
+
251
+ def _export_dashboard(self, conn):
252
+ """Generate Dashboard.md with system summary."""
253
+ # Gather stats
254
+ claim_count = conn.execute("SELECT COUNT(*) FROM claims").fetchone()[0]
255
+ source_count = conn.execute("SELECT COUNT(*) FROM sources").fetchone()[0]
256
+ goal_count = conn.execute("SELECT COUNT(*) FROM goals WHERE status = 'Active'").fetchone()[0]
257
+ conflict_count = conn.execute("SELECT COUNT(*) FROM conflicts WHERE resolution_status = 'Unresolved'").fetchone()[0]
258
+ incomplete_count = conn.execute("SELECT COUNT(*) FROM claims WHERE status = 'Incomplete'").fetchone()[0]
259
+ override_count = conn.execute("SELECT COUNT(*) FROM overrides").fetchone()[0]
260
+
261
+ # Epistemic distribution
262
+ epist = conn.execute(
263
+ "SELECT epistemic_tag, COUNT(*) as cnt FROM claims GROUP BY epistemic_tag"
264
+ ).fetchall()
265
+
266
+ # Recent sources
267
+ recent = conn.execute(
268
+ "SELECT doi, title FROM sources ORDER BY created_at DESC LIMIT 10"
269
+ ).fetchall()
270
+
271
+ # High-confidence claims
272
+ high_conf = conn.execute(
273
+ "SELECT claim_id, text, confidence FROM claims ORDER BY confidence DESC LIMIT 10"
274
+ ).fetchall()
275
+
276
+ now = datetime.now().strftime("%Y-%m-%d %H:%M")
277
+
278
+ content = f"""---
279
+ type: dashboard
280
+ updated: {now}
281
+ ---
282
+
283
+ # PhD Research OS Dashboard
284
+
285
+ *Last updated: {now}*
286
+
287
+ ## Overview
288
+
289
+ | Metric | Count |
290
+ |--------|-------|
291
+ | Total Claims | {claim_count} |
292
+ | Sources | {source_count} |
293
+ | Active Goals | {goal_count} |
294
+ | Unresolved Conflicts | {conflict_count} |
295
+ | Incomplete Claims | {incomplete_count} |
296
+ | Expert Overrides | {override_count} |
297
+
298
+ ## Epistemic Distribution
299
+
300
+ | Tag | Count |
301
+ |-----|-------|
302
+ {chr(10).join(f'| {dict(e)["epistemic_tag"]} | {dict(e)["cnt"]} |' for e in epist)}
303
+
304
+ ## Recent Ingestions
305
+
306
+ {chr(10).join(f'- [{dict(s)["title"][:60]}...]({dict(s)["doi"]})' for s in recent)}
307
+
308
+ ## Top Confidence Claims
309
+
310
+ {chr(10).join(f'- [[{dict(c)["claim_id"]}]] ({from_fixed(dict(c)["confidence"]):.3f}): {dict(c)["text"][:80]}...' for c in high_conf)}
311
+
312
+ ## ⚠️ Attention Required
313
+
314
+ ### Incomplete Claims ({incomplete_count})
315
+ {'See Claims/ folder — filter by status: Incomplete' if incomplete_count else '✅ All claims complete'}
316
+
317
+ ### Unresolved Conflicts ({conflict_count})
318
+ {'See Conflicts/ folder for items requiring human review' if conflict_count else '✅ No unresolved conflicts'}
319
+ """
320
+
321
+ filepath = self.vault_path / "Dashboard.md"
322
+ filepath.write_text(content)
323
+
324
+ def _format_params(self, params: dict) -> str:
325
+ if not params:
326
+ return "No parameters recorded"
327
+ return "\n".join(f"- **{k}:** {v}" for k, v in params.items())