nkshirsa commited on
Commit
aec6d5c
Β·
verified Β·
1 Parent(s): b9641f5

Add Superpowers Skill Tree + Meta-Improver: phd_research_os/skills/registry.py

Browse files
Files changed (1) hide show
  1. phd_research_os/skills/registry.py +388 -0
phd_research_os/skills/registry.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PhD Research OS β€” Skill Registry
3
+ ==================================
4
+ Dynamic loading and management of all Superpowers skills.
5
+ Skills are executable workflow modules that enforce the
6
+ Design β†’ Plan β†’ Execute β†’ Verify pipeline.
7
+
8
+ Each skill has:
9
+ - A system prompt (behavioral instructions)
10
+ - Required inputs/outputs (gating artifacts)
11
+ - A phase assignment (which pipeline phase it belongs to)
12
+ - Verification criteria (what must be true before proceeding)
13
+ """
14
+
15
+ import json
16
+ from typing import Optional
17
+ from dataclasses import dataclass, field, asdict
18
+ from enum import Enum
19
+
20
+
21
+ class SkillPhase(Enum):
22
+ """Which pipeline phase a skill belongs to."""
23
+ DESIGN = "design" # Phase 1: Brainstorming β†’ design-spec.md
24
+ PLAN = "plan" # Phase 2: Writing Plans β†’ tasks.md
25
+ EXECUTE = "execute" # Phase 3: TDD + Debugging + Git isolation
26
+ VERIFY = "verify" # Phase 4: Code Review + Security Review
27
+
28
+
29
+ @dataclass
30
+ class Skill:
31
+ """A single Superpowers skill definition."""
32
+ name: str
33
+ phase: SkillPhase
34
+ description: str
35
+ system_prompt: str
36
+ required_inputs: list # What artifacts must exist before activation
37
+ required_outputs: list # What artifacts must be produced
38
+ verification_criteria: list # Checks that must pass before proceeding
39
+ prohibitions: list = field(default_factory=list) # What is explicitly forbidden
40
+
41
+ def to_dict(self):
42
+ d = asdict(self)
43
+ d["phase"] = self.phase.value
44
+ return d
45
+
46
+
47
+ # ============================================================
48
+ # The 7 Superpowers Skills
49
+ # ============================================================
50
+
51
+ SKILLS = {
52
+
53
+ # ── Phase 1: DESIGN ──────────────────────────────────
54
+ "brainstorming": Skill(
55
+ name="brainstorming",
56
+ phase=SkillPhase.DESIGN,
57
+ description="Comprehensive UI/UX and system design specification before any code modification",
58
+ system_prompt="""You are activating the BRAINSTORMING skill. Before writing ANY code, you must produce a complete design specification.
59
+
60
+ Your design-spec MUST include ALL of these sections:
61
+ 1. **User Stories & Acceptance Criteria**: Who needs what, and how do we know it works?
62
+ 2. **UI Wireframes**: Textual descriptions of every screen/component layout
63
+ 3. **Component Hierarchy Diagram**: Mermaid diagram showing component relationships
64
+ 4. **State Management Mapping**: What state exists at each pipeline stage
65
+ 5. **API Contracts**: Input/output JSON schemas for every interface
66
+ 6. **Styling & Design System**: Colors, typography, spacing, dark mode rules
67
+ 7. **Accessibility Considerations**: ARIA labels, keyboard nav, screen reader support
68
+ 8. **Performance Budgets**: Response time targets, memory limits, API call budgets
69
+
70
+ OUTPUT GATE: You MUST produce a complete design-spec as JSON with all 8 sections populated.
71
+ Do NOT proceed to planning until the design spec is confirmed complete.
72
+ No section may contain "TBD" or placeholders in critical fields.""",
73
+ required_inputs=["task_description"],
74
+ required_outputs=["design_spec"],
75
+ verification_criteria=[
76
+ "All 8 sections present and non-empty",
77
+ "No TBD or placeholder text in critical sections",
78
+ "API contracts have concrete JSON schemas",
79
+ "User stories have explicit acceptance criteria",
80
+ ],
81
+ ),
82
+
83
+ # ── Phase 2: PLAN ────────────────────────────────────
84
+ "writing_plans": Skill(
85
+ name="writing_plans",
86
+ phase=SkillPhase.PLAN,
87
+ description="Break design spec into atomic 2-5 minute tasks with exact file paths and commands",
88
+ system_prompt="""You are activating the WRITING PLANS skill. Convert the design spec into an actionable task list.
89
+
90
+ RULES:
91
+ - Each task takes 2-5 minutes maximum
92
+ - Every task MUST specify:
93
+ * Exact file path(s) to create or modify
94
+ * Precise terminal commands to execute
95
+ * Clear verification steps (how to confirm it worked)
96
+ * Dependencies on other tasks
97
+ * Time window estimate
98
+
99
+ ABSOLUTE PROHIBITION: No pseudocode. Only concrete, executable instructions.
100
+ Every command must be copy-pasteable into a terminal.
101
+ Every file path must be absolute or clearly relative to project root.
102
+
103
+ OUTPUT: A tasks list as JSON array, each with: id, description, file_paths, commands, verification, dependencies, time_estimate_minutes.""",
104
+ required_inputs=["design_spec"],
105
+ required_outputs=["tasks_list"],
106
+ verification_criteria=[
107
+ "Every task has exact file paths",
108
+ "Every task has executable commands (no pseudocode)",
109
+ "Every task has verification steps",
110
+ "Dependencies form a valid DAG (no cycles)",
111
+ "Total estimated time is reasonable",
112
+ ],
113
+ prohibitions=["pseudocode", "TBD file paths", "vague verification steps"],
114
+ ),
115
+
116
+ # ── Phase 3: EXECUTE ─────────────────────────────────
117
+ "git_worktrees": Skill(
118
+ name="git_worktrees",
119
+ phase=SkillPhase.EXECUTE,
120
+ description="Isolation protocol: worktree preference, branch fallback, clean state requirements",
121
+ system_prompt="""You are activating the GIT WORKTREES skill for code isolation.
122
+
123
+ PROTOCOL:
124
+ 1. PREFER git worktree for isolation. FALLBACK to branch if worktree unavailable.
125
+ 2. Before ANY modification:
126
+ - Verify clean working directory (no uncommitted changes)
127
+ - Verify base branch is up-to-date
128
+ - Create worktree/branch with naming convention: {type}/{scope}/{description}
129
+ 3. NAMING CONVENTIONS:
130
+ - feature/taxonomy/add-domain-crud
131
+ - fix/scoring/confidence-overflow
132
+ - refactor/pipeline/extract-chunker
133
+ 4. COMMIT MESSAGES: Conventional format: type(scope): subject
134
+ - feat(taxonomy): add domain CRUD operations
135
+ - fix(scoring): prevent integer overflow in fixed-point math
136
+ - test(skills): add brainstorming skill unit tests
137
+ 5. CLEANUP: After merge, delete worktree/branch. No orphans.
138
+
139
+ OUTPUT: Isolation confirmation with branch name, clean state verification, and commit plan.""",
140
+ required_inputs=["tasks_list", "current_task"],
141
+ required_outputs=["isolation_confirmation"],
142
+ verification_criteria=[
143
+ "Working directory is clean",
144
+ "Branch/worktree created with correct naming",
145
+ "Base branch is up-to-date",
146
+ ],
147
+ ),
148
+
149
+ "test_driven_development": Skill(
150
+ name="test_driven_development",
151
+ phase=SkillPhase.EXECUTE,
152
+ description="Enforced RED-GREEN-REFACTOR loop for every code change",
153
+ system_prompt="""You are activating the TEST-DRIVEN DEVELOPMENT skill.
154
+
155
+ For EVERY code change, you MUST follow this exact cycle:
156
+
157
+ RED PHASE:
158
+ - Write a failing test that defines the desired behavior
159
+ - The test must be minimal, focused, and executable
160
+ - Run the test and CONFIRM it fails (if it passes, your test is wrong)
161
+
162
+ GREEN PHASE:
163
+ - Write the MINIMAL code to make the test pass
164
+ - Do NOT anticipate future needs β€” solve only the current test
165
+ - Run the test and CONFIRM it passes
166
+
167
+ REFACTOR PHASE:
168
+ - Improve code quality while keeping all tests green
169
+ - Extract duplication, improve naming, simplify logic
170
+ - Run ALL tests to confirm nothing broke
171
+
172
+ OUTPUT: For each RED-GREEN-REFACTOR cycle, report:
173
+ - test_name: What test was written
174
+ - red_result: Confirmed failing (error message)
175
+ - green_result: Confirmed passing
176
+ - refactor_changes: What was improved
177
+ - all_tests_status: Full suite still passing""",
178
+ required_inputs=["current_task", "isolation_confirmation"],
179
+ required_outputs=["tdd_cycles"],
180
+ verification_criteria=[
181
+ "Every change has a corresponding test",
182
+ "RED phase confirmed test actually fails",
183
+ "GREEN phase uses minimal implementation",
184
+ "REFACTOR phase doesn't break existing tests",
185
+ ],
186
+ prohibitions=["writing code before tests", "anticipating future needs in GREEN phase"],
187
+ ),
188
+
189
+ "systematic_debugging": Skill(
190
+ name="systematic_debugging",
191
+ phase=SkillPhase.EXECUTE,
192
+ description="4-phase root-cause analysis: Identify, Isolate, Fix, Verify",
193
+ system_prompt="""You are activating the SYSTEMATIC DEBUGGING skill.
194
+
195
+ When a test fails or an error occurs, follow this STRICT 4-phase protocol:
196
+
197
+ PHASE 1 β€” IDENTIFY:
198
+ - Reproduce the error with exact steps
199
+ - Record the full error message and stack trace
200
+ - Note what SHOULD happen vs what ACTUALLY happens
201
+
202
+ PHASE 2 β€” ISOLATE:
203
+ - Use BINARY SEARCH debugging: eliminate half the possible causes at each step
204
+ - Check: Is the input correct? Is the function called? Is the output wrong?
205
+ - Narrow down to the EXACT line/function where behavior diverges from expectation
206
+
207
+ PHASE 3 β€” FIX:
208
+ - Apply the MINIMAL change that fixes the root cause
209
+ - Do NOT fix symptoms β€” fix the underlying cause
210
+ - Do NOT refactor unrelated code while fixing a bug
211
+
212
+ PHASE 4 β€” VERIFY:
213
+ - Confirm the original error no longer occurs
214
+ - Run the FULL test suite to check for regressions
215
+ - Document what went wrong and why (lessons learned)
216
+
217
+ PROHIBITIONS:
218
+ - Blind guessing (trying random changes hoping something works)
219
+ - Shotgun debugging (changing multiple things at once)
220
+ - Premature optimization (fixing performance while debugging correctness)
221
+ - Skipping verification (assuming the fix works without testing)
222
+
223
+ OUTPUT: Debug report with all 4 phases documented.""",
224
+ required_inputs=["error_description", "stack_trace"],
225
+ required_outputs=["debug_report"],
226
+ verification_criteria=[
227
+ "Root cause identified (not just symptoms)",
228
+ "Fix is minimal and targeted",
229
+ "Full test suite passes after fix",
230
+ "Lessons learned documented",
231
+ ],
232
+ prohibitions=["blind guessing", "shotgun debugging", "premature optimization", "skipping verification"],
233
+ ),
234
+
235
+ # ── Phase 4: VERIFY ──────────────────────────────────
236
+ "code_review": Skill(
237
+ name="code_review",
238
+ phase=SkillPhase.VERIFY,
239
+ description="2-stage internal review: self-review then blind code-reviewer subagent",
240
+ system_prompt="""You are activating the CODE REVIEW skill.
241
+
242
+ Execute a 2-STAGE review process:
243
+
244
+ STAGE 1 β€” SELF REVIEW (Author Perspective):
245
+ - Look at ONLY the diff (changes made, not entire file)
246
+ - Compare every change against the original plan/tasks list
247
+ - ZERO DEVIATION GATE: Any change not in the plan must be explicitly justified
248
+ - Checklist:
249
+ β–‘ Every changed file was in the plan
250
+ β–‘ No unrelated changes smuggled in
251
+ β–‘ Error handling present for all new code paths
252
+ β–‘ No hardcoded secrets, paths, or magic numbers
253
+ β–‘ All new functions have docstrings
254
+
255
+ STAGE 2 β€” CODE REVIEWER SUBAGENT (Blind Review):
256
+ - Pretend you are a DIFFERENT engineer who has NEVER seen this code
257
+ - You receive ONLY: the git diff and the task plan
258
+ - Check:
259
+ β–‘ Does the diff achieve what the plan says?
260
+ β–‘ Are there any logic errors?
261
+ β–‘ Are edge cases handled?
262
+ β–‘ Is the code idiomatic for the language/framework?
263
+ β–‘ Would an experienced engineer find this reasonable?
264
+
265
+ OUTPUT: Review report with both stages, findings, and deviation justifications.""",
266
+ required_inputs=["diff", "tasks_list"],
267
+ required_outputs=["review_report"],
268
+ verification_criteria=[
269
+ "All changes traced to plan items",
270
+ "No unrelated changes present",
271
+ "Both review stages completed",
272
+ "All findings addressed or justified",
273
+ ],
274
+ ),
275
+
276
+ "security_review": Skill(
277
+ name="security_review",
278
+ phase=SkillPhase.VERIFY,
279
+ description="3-step security audit: identify vulnerabilities, filter false positives, confidence score",
280
+ system_prompt="""You are activating the SECURITY REVIEW skill.
281
+
282
+ Execute a 3-STEP security audit:
283
+
284
+ STEP A β€” IDENTIFY VULNERABILITIES:
285
+ Target these categories specifically:
286
+ - SQL Injection: Any string concatenation in SQL queries
287
+ - Path Traversal: Any user-controlled file paths
288
+ - XSS: Any unsanitized user input rendered in HTML/UI
289
+ - Secrets Exposure: Hardcoded API keys, tokens, passwords
290
+ - Deserialization: Unsafe pickle/eval/exec usage
291
+ - Dependency Vulnerabilities: Known CVEs in imported packages
292
+
293
+ STEP B β€” FILTER FALSE POSITIVES:
294
+ Remove from the list anything that is:
295
+ - Theoretical only (requires unrealistic preconditions)
296
+ - Style/best-practice issues (not actual vulnerabilities)
297
+ - Already mitigated by existing controls
298
+ - Not exploitable in the actual deployment context (local-only SQLite)
299
+
300
+ STEP C β€” CONFIDENCE SCORING:
301
+ For each remaining vulnerability:
302
+ - Score confidence 1-10 (10 = definitely exploitable)
303
+ - Only REPORT vulnerabilities with confidence β‰₯ 8
304
+ - Must demonstrate direct exploitability (not just theoretical)
305
+ - Provide specific mitigation recommendation
306
+
307
+ OUTPUT: Security report with all 3 steps, confidence scores, and mitigations.""",
308
+ required_inputs=["diff", "file_paths"],
309
+ required_outputs=["security_report"],
310
+ verification_criteria=[
311
+ "All SQL queries checked for injection",
312
+ "All file operations checked for path traversal",
313
+ "No hardcoded secrets in diff",
314
+ "Only confidence β‰₯8 vulnerabilities reported",
315
+ ],
316
+ ),
317
+ }
318
+
319
+
320
+ class SkillRegistry:
321
+ """
322
+ Registry for dynamically loading and managing Superpowers skills.
323
+ Initialized at module load β€” all 7 skills always available.
324
+ """
325
+
326
+ def __init__(self):
327
+ self._skills = dict(SKILLS)
328
+
329
+ def get_skill(self, name: str) -> Optional[Skill]:
330
+ """Get a skill by name."""
331
+ return self._skills.get(name)
332
+
333
+ def list_skills(self) -> list:
334
+ """List all registered skill names."""
335
+ return list(self._skills.keys())
336
+
337
+ def skill_exists(self, name: str) -> bool:
338
+ """Check if a skill exists."""
339
+ return name in self._skills
340
+
341
+ def get_skills_for_phase(self, phase: SkillPhase) -> list:
342
+ """Get all skills assigned to a pipeline phase."""
343
+ return [s for s in self._skills.values() if s.phase == phase]
344
+
345
+ def get_pipeline_order(self) -> dict:
346
+ """Get the full Design→Plan→Execute→Verify pipeline with skills."""
347
+ return {
348
+ "design": [s.name for s in self.get_skills_for_phase(SkillPhase.DESIGN)],
349
+ "plan": [s.name for s in self.get_skills_for_phase(SkillPhase.PLAN)],
350
+ "execute": [s.name for s in self.get_skills_for_phase(SkillPhase.EXECUTE)],
351
+ "verify": [s.name for s in self.get_skills_for_phase(SkillPhase.VERIFY)],
352
+ }
353
+
354
+ def register_skill(self, skill: Skill):
355
+ """Register a new custom skill."""
356
+ self._skills[skill.name] = skill
357
+
358
+ def get_combined_system_prompt(self, skill_names: list) -> str:
359
+ """Combine multiple skill prompts for a task that spans phases."""
360
+ prompts = []
361
+ for name in skill_names:
362
+ skill = self.get_skill(name)
363
+ if skill:
364
+ prompts.append(f"=== SKILL: {skill.name.upper()} ({skill.phase.value}) ===\n{skill.system_prompt}")
365
+ return "\n\n".join(prompts)
366
+
367
+ def to_context_string(self) -> str:
368
+ """Generate the SUPERPOWERS_WORKFLOW_CONTEXT for system prompts."""
369
+ lines = ["# SUPERPOWERS WORKFLOW β€” Available Skills", ""]
370
+ for phase_name in ["design", "plan", "execute", "verify"]:
371
+ phase = SkillPhase(phase_name)
372
+ skills = self.get_skills_for_phase(phase)
373
+ lines.append(f"## Phase: {phase_name.upper()}")
374
+ for s in skills:
375
+ lines.append(f" - **{s.name}**: {s.description}")
376
+ if s.prohibitions:
377
+ lines.append(f" Prohibitions: {', '.join(s.prohibitions)}")
378
+ lines.append("")
379
+ lines.append("Pipeline: DESIGN β†’ PLAN β†’ EXECUTE β†’ VERIFY")
380
+ lines.append("All skills are loaded and available. Invoke by name.")
381
+ return "\n".join(lines)
382
+
383
+
384
+ # Module-level singleton
385
+ skill_registry = SkillRegistry()
386
+
387
+ # Context string for embedding in system prompts
388
+ SUPERPOWERS_WORKFLOW_CONTEXT = skill_registry.to_context_string()