Add Superpowers Skill Tree + Meta-Improver: phd_research_os/skills/registry.py

Browse files

Files changed (1) hide show

phd_research_os/skills/registry.py +388 -0

phd_research_os/skills/registry.py ADDED Viewed

	@@ -0,0 +1,388 @@

+"""
+PhD Research OS — Skill Registry
+==================================
+Dynamic loading and management of all Superpowers skills.
+Skills are executable workflow modules that enforce the
+Design → Plan → Execute → Verify pipeline.
+Each skill has:
+  - A system prompt (behavioral instructions)
+  - Required inputs/outputs (gating artifacts)
+  - A phase assignment (which pipeline phase it belongs to)
+  - Verification criteria (what must be true before proceeding)
+"""
+import json
+from typing import Optional
+from dataclasses import dataclass, field, asdict
+from enum import Enum
+class SkillPhase(Enum):
+    """Which pipeline phase a skill belongs to."""
+    DESIGN = "design"        # Phase 1: Brainstorming → design-spec.md
+    PLAN = "plan"            # Phase 2: Writing Plans → tasks.md
+    EXECUTE = "execute"      # Phase 3: TDD + Debugging + Git isolation
+    VERIFY = "verify"        # Phase 4: Code Review + Security Review
+@dataclass
+class Skill:
+    """A single Superpowers skill definition."""
+    name: str
+    phase: SkillPhase
+    description: str
+    system_prompt: str
+    required_inputs: list      # What artifacts must exist before activation
+    required_outputs: list     # What artifacts must be produced
+    verification_criteria: list  # Checks that must pass before proceeding
+    prohibitions: list = field(default_factory=list)  # What is explicitly forbidden
+    def to_dict(self):
+        d = asdict(self)
+        d["phase"] = self.phase.value
+        return d
+# ============================================================
+# The 7 Superpowers Skills
+# ============================================================
+SKILLS = {
+    # ── Phase 1: DESIGN ──────────────────────────────────
+    "brainstorming": Skill(
+        name="brainstorming",
+        phase=SkillPhase.DESIGN,
+        description="Comprehensive UI/UX and system design specification before any code modification",
+        system_prompt="""You are activating the BRAINSTORMING skill. Before writing ANY code, you must produce a complete design specification.
+Your design-spec MUST include ALL of these sections:
+1. **User Stories & Acceptance Criteria**: Who needs what, and how do we know it works?
+2. **UI Wireframes**: Textual descriptions of every screen/component layout
+3. **Component Hierarchy Diagram**: Mermaid diagram showing component relationships
+4. **State Management Mapping**: What state exists at each pipeline stage
+5. **API Contracts**: Input/output JSON schemas for every interface
+6. **Styling & Design System**: Colors, typography, spacing, dark mode rules
+7. **Accessibility Considerations**: ARIA labels, keyboard nav, screen reader support
+8. **Performance Budgets**: Response time targets, memory limits, API call budgets
+OUTPUT GATE: You MUST produce a complete design-spec as JSON with all 8 sections populated.
+Do NOT proceed to planning until the design spec is confirmed complete.
+No section may contain "TBD" or placeholders in critical fields.""",
+        required_inputs=["task_description"],
+        required_outputs=["design_spec"],
+        verification_criteria=[
+            "All 8 sections present and non-empty",
+            "No TBD or placeholder text in critical sections",
+            "API contracts have concrete JSON schemas",
+            "User stories have explicit acceptance criteria",
+        ],
+    ),
+    # ── Phase 2: PLAN ────────────────────────────────────
+    "writing_plans": Skill(
+        name="writing_plans",
+        phase=SkillPhase.PLAN,
+        description="Break design spec into atomic 2-5 minute tasks with exact file paths and commands",
+        system_prompt="""You are activating the WRITING PLANS skill. Convert the design spec into an actionable task list.
+RULES:
+- Each task takes 2-5 minutes maximum
+- Every task MUST specify:
+  * Exact file path(s) to create or modify
+  * Precise terminal commands to execute
+  * Clear verification steps (how to confirm it worked)
+  * Dependencies on other tasks
+  * Time window estimate
+ABSOLUTE PROHIBITION: No pseudocode. Only concrete, executable instructions.
+Every command must be copy-pasteable into a terminal.
+Every file path must be absolute or clearly relative to project root.
+OUTPUT: A tasks list as JSON array, each with: id, description, file_paths, commands, verification, dependencies, time_estimate_minutes.""",
+        required_inputs=["design_spec"],
+        required_outputs=["tasks_list"],
+        verification_criteria=[
+            "Every task has exact file paths",
+            "Every task has executable commands (no pseudocode)",
+            "Every task has verification steps",
+            "Dependencies form a valid DAG (no cycles)",
+            "Total estimated time is reasonable",
+        ],
+        prohibitions=["pseudocode", "TBD file paths", "vague verification steps"],
+    ),
+    # ── Phase 3: EXECUTE ─────────────────────────────────
+    "git_worktrees": Skill(
+        name="git_worktrees",
+        phase=SkillPhase.EXECUTE,
+        description="Isolation protocol: worktree preference, branch fallback, clean state requirements",
+        system_prompt="""You are activating the GIT WORKTREES skill for code isolation.
+PROTOCOL:
+1. PREFER git worktree for isolation. FALLBACK to branch if worktree unavailable.
+2. Before ANY modification:
+   - Verify clean working directory (no uncommitted changes)
+   - Verify base branch is up-to-date
+   - Create worktree/branch with naming convention: {type}/{scope}/{description}
+3. NAMING CONVENTIONS:
+   - feature/taxonomy/add-domain-crud
+   - fix/scoring/confidence-overflow
+   - refactor/pipeline/extract-chunker
+4. COMMIT MESSAGES: Conventional format: type(scope): subject
+   - feat(taxonomy): add domain CRUD operations
+   - fix(scoring): prevent integer overflow in fixed-point math
+   - test(skills): add brainstorming skill unit tests
+5. CLEANUP: After merge, delete worktree/branch. No orphans.
+OUTPUT: Isolation confirmation with branch name, clean state verification, and commit plan.""",
+        required_inputs=["tasks_list", "current_task"],
+        required_outputs=["isolation_confirmation"],
+        verification_criteria=[
+            "Working directory is clean",
+            "Branch/worktree created with correct naming",
+            "Base branch is up-to-date",
+        ],
+    ),
+    "test_driven_development": Skill(
+        name="test_driven_development",
+        phase=SkillPhase.EXECUTE,
+        description="Enforced RED-GREEN-REFACTOR loop for every code change",
+        system_prompt="""You are activating the TEST-DRIVEN DEVELOPMENT skill.
+For EVERY code change, you MUST follow this exact cycle:
+RED PHASE:
+- Write a failing test that defines the desired behavior
+- The test must be minimal, focused, and executable
+- Run the test and CONFIRM it fails (if it passes, your test is wrong)
+GREEN PHASE:
+- Write the MINIMAL code to make the test pass
+- Do NOT anticipate future needs — solve only the current test
+- Run the test and CONFIRM it passes
+REFACTOR PHASE:
+- Improve code quality while keeping all tests green
+- Extract duplication, improve naming, simplify logic
+- Run ALL tests to confirm nothing broke
+OUTPUT: For each RED-GREEN-REFACTOR cycle, report:
+- test_name: What test was written
+- red_result: Confirmed failing (error message)
+- green_result: Confirmed passing
+- refactor_changes: What was improved
+- all_tests_status: Full suite still passing""",
+        required_inputs=["current_task", "isolation_confirmation"],
+        required_outputs=["tdd_cycles"],
+        verification_criteria=[
+            "Every change has a corresponding test",
+            "RED phase confirmed test actually fails",
+            "GREEN phase uses minimal implementation",
+            "REFACTOR phase doesn't break existing tests",
+        ],
+        prohibitions=["writing code before tests", "anticipating future needs in GREEN phase"],
+    ),
+    "systematic_debugging": Skill(
+        name="systematic_debugging",
+        phase=SkillPhase.EXECUTE,
+        description="4-phase root-cause analysis: Identify, Isolate, Fix, Verify",
+        system_prompt="""You are activating the SYSTEMATIC DEBUGGING skill.
+When a test fails or an error occurs, follow this STRICT 4-phase protocol:
+PHASE 1 — IDENTIFY:
+- Reproduce the error with exact steps
+- Record the full error message and stack trace
+- Note what SHOULD happen vs what ACTUALLY happens
+PHASE 2 — ISOLATE:
+- Use BINARY SEARCH debugging: eliminate half the possible causes at each step
+- Check: Is the input correct? Is the function called? Is the output wrong?
+- Narrow down to the EXACT line/function where behavior diverges from expectation
+PHASE 3 — FIX:
+- Apply the MINIMAL change that fixes the root cause
+- Do NOT fix symptoms — fix the underlying cause
+- Do NOT refactor unrelated code while fixing a bug
+PHASE 4 — VERIFY:
+- Confirm the original error no longer occurs
+- Run the FULL test suite to check for regressions
+- Document what went wrong and why (lessons learned)
+PROHIBITIONS:
+- Blind guessing (trying random changes hoping something works)
+- Shotgun debugging (changing multiple things at once)
+- Premature optimization (fixing performance while debugging correctness)
+- Skipping verification (assuming the fix works without testing)
+OUTPUT: Debug report with all 4 phases documented.""",
+        required_inputs=["error_description", "stack_trace"],
+        required_outputs=["debug_report"],
+        verification_criteria=[
+            "Root cause identified (not just symptoms)",
+            "Fix is minimal and targeted",
+            "Full test suite passes after fix",
+            "Lessons learned documented",
+        ],
+        prohibitions=["blind guessing", "shotgun debugging", "premature optimization", "skipping verification"],
+    ),
+    # ── Phase 4: VERIFY ──────────────────────────────────
+    "code_review": Skill(
+        name="code_review",
+        phase=SkillPhase.VERIFY,
+        description="2-stage internal review: self-review then blind code-reviewer subagent",
+        system_prompt="""You are activating the CODE REVIEW skill.
+Execute a 2-STAGE review process:
+STAGE 1 — SELF REVIEW (Author Perspective):
+- Look at ONLY the diff (changes made, not entire file)
+- Compare every change against the original plan/tasks list
+- ZERO DEVIATION GATE: Any change not in the plan must be explicitly justified
+- Checklist:
+  □ Every changed file was in the plan
+  □ No unrelated changes smuggled in
+  □ Error handling present for all new code paths
+  □ No hardcoded secrets, paths, or magic numbers
+  □ All new functions have docstrings
+STAGE 2 — CODE REVIEWER SUBAGENT (Blind Review):
+- Pretend you are a DIFFERENT engineer who has NEVER seen this code
+- You receive ONLY: the git diff and the task plan
+- Check:
+  □ Does the diff achieve what the plan says?
+  □ Are there any logic errors?
+  □ Are edge cases handled?
+  □ Is the code idiomatic for the language/framework?
+  □ Would an experienced engineer find this reasonable?
+OUTPUT: Review report with both stages, findings, and deviation justifications.""",
+        required_inputs=["diff", "tasks_list"],
+        required_outputs=["review_report"],
+        verification_criteria=[
+            "All changes traced to plan items",
+            "No unrelated changes present",
+            "Both review stages completed",
+            "All findings addressed or justified",
+        ],
+    ),
+    "security_review": Skill(
+        name="security_review",
+        phase=SkillPhase.VERIFY,
+        description="3-step security audit: identify vulnerabilities, filter false positives, confidence score",
+        system_prompt="""You are activating the SECURITY REVIEW skill.
+Execute a 3-STEP security audit:
+STEP A — IDENTIFY VULNERABILITIES:
+Target these categories specifically:
+- SQL Injection: Any string concatenation in SQL queries
+- Path Traversal: Any user-controlled file paths
+- XSS: Any unsanitized user input rendered in HTML/UI
+- Secrets Exposure: Hardcoded API keys, tokens, passwords
+- Deserialization: Unsafe pickle/eval/exec usage
+- Dependency Vulnerabilities: Known CVEs in imported packages
+STEP B — FILTER FALSE POSITIVES:
+Remove from the list anything that is:
+- Theoretical only (requires unrealistic preconditions)
+- Style/best-practice issues (not actual vulnerabilities)
+- Already mitigated by existing controls
+- Not exploitable in the actual deployment context (local-only SQLite)
+STEP C — CONFIDENCE SCORING:
+For each remaining vulnerability:
+- Score confidence 1-10 (10 = definitely exploitable)
+- Only REPORT vulnerabilities with confidence ≥ 8
+- Must demonstrate direct exploitability (not just theoretical)
+- Provide specific mitigation recommendation
+OUTPUT: Security report with all 3 steps, confidence scores, and mitigations.""",
+        required_inputs=["diff", "file_paths"],
+        required_outputs=["security_report"],
+        verification_criteria=[
+            "All SQL queries checked for injection",
+            "All file operations checked for path traversal",
+            "No hardcoded secrets in diff",
+            "Only confidence ≥8 vulnerabilities reported",
+        ],
+    ),
+}
+class SkillRegistry:
+    """
+    Registry for dynamically loading and managing Superpowers skills.
+    Initialized at module load — all 7 skills always available.
+    """
+    def __init__(self):
+        self._skills = dict(SKILLS)
+    def get_skill(self, name: str) -> Optional[Skill]:
+        """Get a skill by name."""
+        return self._skills.get(name)
+    def list_skills(self) -> list:
+        """List all registered skill names."""
+        return list(self._skills.keys())
+    def skill_exists(self, name: str) -> bool:
+        """Check if a skill exists."""
+        return name in self._skills
+    def get_skills_for_phase(self, phase: SkillPhase) -> list:
+        """Get all skills assigned to a pipeline phase."""
+        return [s for s in self._skills.values() if s.phase == phase]
+    def get_pipeline_order(self) -> dict:
+        """Get the full Design→Plan→Execute→Verify pipeline with skills."""
+        return {
+            "design": [s.name for s in self.get_skills_for_phase(SkillPhase.DESIGN)],
+            "plan": [s.name for s in self.get_skills_for_phase(SkillPhase.PLAN)],
+            "execute": [s.name for s in self.get_skills_for_phase(SkillPhase.EXECUTE)],
+            "verify": [s.name for s in self.get_skills_for_phase(SkillPhase.VERIFY)],
+        }
+    def register_skill(self, skill: Skill):
+        """Register a new custom skill."""
+        self._skills[skill.name] = skill
+    def get_combined_system_prompt(self, skill_names: list) -> str:
+        """Combine multiple skill prompts for a task that spans phases."""
+        prompts = []
+        for name in skill_names:
+            skill = self.get_skill(name)
+            if skill:
+                prompts.append(f"=== SKILL: {skill.name.upper()} ({skill.phase.value}) ===\n{skill.system_prompt}")
+        return "\n\n".join(prompts)
+    def to_context_string(self) -> str:
+        """Generate the SUPERPOWERS_WORKFLOW_CONTEXT for system prompts."""
+        lines = ["# SUPERPOWERS WORKFLOW — Available Skills", ""]
+        for phase_name in ["design", "plan", "execute", "verify"]:
+            phase = SkillPhase(phase_name)
+            skills = self.get_skills_for_phase(phase)
+            lines.append(f"## Phase: {phase_name.upper()}")
+            for s in skills:
+                lines.append(f"  - **{s.name}**: {s.description}")
+                if s.prohibitions:
+                    lines.append(f"    Prohibitions: {', '.join(s.prohibitions)}")
+            lines.append("")
+        lines.append("Pipeline: DESIGN → PLAN → EXECUTE → VERIFY")
+        lines.append("All skills are loaded and available. Invoke by name.")
+        return "\n".join(lines)
+# Module-level singleton
+skill_registry = SkillRegistry()
+# Context string for embedding in system prompts
+SUPERPOWERS_WORKFLOW_CONTEXT = skill_registry.to_context_string()