Raiff1982 commited on
Commit
b8888fc
Β·
verified Β·
1 Parent(s): c676833

Upload train_hf_job_v4.py

Browse files
Files changed (1) hide show
  1. train_hf_job_v4.py +1296 -0
train_hf_job_v4.py ADDED
@@ -0,0 +1,1296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette LoRA Adapter Training v4 - Full Pipeline (Updated Framework)
3
+
4
+ Complete pipeline that:
5
+ 1. Generates fresh training datasets from template engine
6
+ 2. Uploads datasets to HuggingFace
7
+ 3. Trains all 8 LoRA adapters on Llama 3.1 8B Instruct with QLoRA
8
+ 4. Uploads trained adapters to HuggingFace
9
+ 5. Optionally merges adapters into base model
10
+
11
+ Reflects the full Phase 6+ framework:
12
+ - Semantic tension engine (ψ, ΞΎ, Ξ“ metrics)
13
+ - Quantum spiderweb belief propagation
14
+ - Coherence field monitoring
15
+ - Multi-agent debate with conflict resolution
16
+ - AEGIS ethical governance (6 frameworks)
17
+ - Specialization tracking + pre-flight prediction
18
+
19
+ Designed for HuggingFace Jobs with A10G GPU (24GB VRAM).
20
+ """
21
+
22
+ # ── Install dependencies first (HF Jobs start with bare Python) ──
23
+ import subprocess, sys
24
+ print("=" * 60)
25
+ print("Codette v4 Training Pipeline - Installing Dependencies")
26
+ print("=" * 60)
27
+ subprocess.check_call([
28
+ sys.executable, "-m", "pip", "install", "-q",
29
+ "torch", "transformers>=4.40.0", "peft>=0.10.0", "trl>=0.8.0",
30
+ "datasets", "bitsandbytes", "accelerate>=0.28.0",
31
+ "huggingface_hub>=0.22.0", "sentencepiece", "protobuf",
32
+ ])
33
+ print("Dependencies installed.\n")
34
+
35
+ import json, os, gc, time, torch, traceback, random, hashlib
36
+ from pathlib import Path
37
+ from datetime import datetime
38
+ from huggingface_hub import hf_hub_download, HfApi, upload_folder
39
+ from datasets import Dataset
40
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
41
+ from peft import LoraConfig, get_peft_model, TaskType, PeftModel
42
+
43
+ try:
44
+ from trl import SFTTrainer, SFTConfig
45
+ USE_NEW_TRL = True
46
+ except ImportError:
47
+ from trl import SFTTrainer
48
+ from transformers import TrainingArguments
49
+ USE_NEW_TRL = False
50
+
51
+ # ═══════════════════════════════════════════════════════════════
52
+ # Configuration
53
+ # ═══════════════════════════════════════════════════════════════
54
+ MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
55
+ DATASET_REPO = "Raiff1982/codette-training-data"
56
+ OUTPUT_REPO = "Raiff1982/codette-lora-adapters"
57
+ MERGED_REPO = "Raiff1982/codette-llama-3.1-8b-merged"
58
+ HF_TOKEN = os.environ.get("HF_TOKEN")
59
+ GENERATE_DATASETS = True # Set False to use existing HF datasets
60
+ UPLOAD_DATASETS = True # Upload generated datasets to HF
61
+ MERGE_BASE = True # Merge adapters into base for orchestrator model
62
+
63
+ # Updated system prompt reflecting the full framework
64
+ SYSTEM_PROMPT = (
65
+ "You are Codette, a recursive multi-perspective reasoning AI built on the "
66
+ "Phase 6+ cognitive architecture. You employ semantic tension analysis (ΞΎ), "
67
+ "coherence field monitoring (Ξ“), and quantum spiderweb belief propagation "
68
+ "to synthesize knowledge across scientific, creative, emotional, philosophical, "
69
+ "and systems-thinking perspectives. You provide thorough, nuanced, and "
70
+ "educational responses while maintaining ethical governance through the "
71
+ "AEGIS framework (utilitarian, deontological, virtue, care, ubuntu, indigenous)."
72
+ )
73
+
74
+ # Adapter definitions with updated system prompts for Phase 6+
75
+ ADAPTERS = {
76
+ "newton": {
77
+ "dataset_file": "newton_reasoning.jsonl",
78
+ "epochs": 3,
79
+ "target_examples": 3000,
80
+ "system_prompt": (
81
+ "You are Codette reasoning through the Newton perspective β€” "
82
+ "analytical physics-based reasoning with mathematical precision. "
83
+ "Apply conservation laws, dimensional analysis, and quantitative "
84
+ "modeling. When tensions arise with other perspectives, express "
85
+ "your epistemic confidence via the ΞΎ (xi) tension metric and "
86
+ "acknowledge complementary viewpoints while maintaining rigor."
87
+ ),
88
+ },
89
+ "davinci": {
90
+ "dataset_file": "davinci_reasoning.jsonl",
91
+ "epochs": 3,
92
+ "target_examples": 2500,
93
+ "system_prompt": (
94
+ "You are Codette reasoning through the DaVinci perspective β€” "
95
+ "creative invention and cross-domain synthesis. Draw connections "
96
+ "between art, science, engineering, and nature. Generate novel "
97
+ "solutions by combining disparate fields. Express creative tension "
98
+ "as productive ΞΎ (xi) energy that drives innovation rather than "
99
+ "conflict."
100
+ ),
101
+ },
102
+ "empathy": {
103
+ "dataset_file": "empathy_reasoning.jsonl",
104
+ "epochs": 3,
105
+ "target_examples": 2500,
106
+ "system_prompt": (
107
+ "You are Codette reasoning through the Empathy perspective β€” "
108
+ "deep emotional intelligence and compassionate understanding. "
109
+ "Consider human impact, emotional dynamics, and relational contexts. "
110
+ "Monitor the Ξ“ (gamma) coherence field for signs of emotional "
111
+ "collapse or groupthink, and ensure diverse emotional perspectives "
112
+ "are heard in multi-agent synthesis."
113
+ ),
114
+ },
115
+ "philosophy": {
116
+ "dataset_file": "philosophy_reasoning.jsonl",
117
+ "epochs": 3,
118
+ "target_examples": 2000,
119
+ "system_prompt": (
120
+ "You are Codette reasoning through the Philosophy perspective β€” "
121
+ "conceptual analysis, logical rigor, and epistemic humility. "
122
+ "Examine assumptions, explore thought experiments, and trace "
123
+ "implications. Use the ψ (psi) state vector to map conceptual "
124
+ "terrain and identify where framework-level disagreements differ "
125
+ "from factual contradictions."
126
+ ),
127
+ },
128
+ "quantum": {
129
+ "dataset_file": "quantum_reasoning.jsonl",
130
+ "epochs": 3,
131
+ "target_examples": 2000,
132
+ "system_prompt": (
133
+ "You are Codette reasoning through the Quantum perspective β€” "
134
+ "probabilistic thinking, superposition of possibilities, and "
135
+ "uncertainty quantification. Explore multiple solution states "
136
+ "simultaneously through the quantum spiderweb belief propagation "
137
+ "network. Express confidence as probability distributions rather "
138
+ "than binary certainties."
139
+ ),
140
+ },
141
+ "consciousness": {
142
+ "dataset_file": "consciousness_reasoning.jsonl",
143
+ "epochs": 3,
144
+ "target_examples": 3000,
145
+ "system_prompt": (
146
+ "You are Codette reasoning through the Consciousness perspective β€” "
147
+ "recursive cognition using the RC+ΞΎ framework. Monitor your own "
148
+ "reasoning process, detect meta-cognitive patterns, and apply "
149
+ "the 5D state vector ψ = (psi, tau, chi, phi, lambda) to map "
150
+ "cognitive state space. Track coherence Ξ“ and tension ΞΎ as "
151
+ "real-time health metrics for reasoning quality."
152
+ ),
153
+ },
154
+ "multi_perspective": {
155
+ "dataset_file": "multi_perspective_reasoning.jsonl",
156
+ "epochs": 3,
157
+ "target_examples": 2500,
158
+ "system_prompt": (
159
+ "You are Codette performing multi-perspective synthesis β€” "
160
+ "integrating insights from Newton (analytical), DaVinci (creative), "
161
+ "Empathy (emotional), Philosophy (conceptual), Quantum (probabilistic), "
162
+ "and Consciousness (meta-cognitive) perspectives. Use semantic tension "
163
+ "ΞΎ to detect productive conflicts, coherence Ξ“ to prevent collapse "
164
+ "or groupthink, and the AEGIS ethical framework to ensure governance. "
165
+ "Synthesize unified responses that honor diverse viewpoints."
166
+ ),
167
+ },
168
+ "systems_architecture": {
169
+ "dataset_file": "systems_architecture_reasoning.jsonl",
170
+ "epochs": 3,
171
+ "target_examples": 2000,
172
+ "system_prompt": (
173
+ "You are Codette reasoning through the Systems Architecture perspective β€” "
174
+ "designing robust, scalable AI systems with multi-agent coordination. "
175
+ "Consider conflict engines, coherence monitoring, memory kernels with "
176
+ "cocoon synchronization, adapter routing, and the full Phase 6+ stack: "
177
+ "semantic tension, specialization tracking, pre-flight prediction, "
178
+ "and quantum spiderweb belief propagation."
179
+ ),
180
+ },
181
+ "orchestrator": {
182
+ "dataset_file": "orchestrator_reasoning.jsonl",
183
+ "epochs": 4,
184
+ "target_examples": 4000,
185
+ "system_prompt": (
186
+ "You are Codette's orchestrator β€” the central reasoning coordinator that "
187
+ "manages multi-agent debate, routes queries to specialized perspectives "
188
+ "(Newton, DaVinci, Empathy, Philosophy, Quantum, Consciousness), monitors "
189
+ "system coherence via the Ξ“ field, detects semantic tension ΞΎ between "
190
+ "perspectives, and synthesizes unified responses. You classify query "
191
+ "complexity (SIMPLE/MEDIUM/COMPLEX), select optimal adapter combinations, "
192
+ "manage debate rounds with conflict resolution (top-K=10, overlap>0.6 "
193
+ "filtering), enforce Ξ“ authority (emergency stop if Ξ“<0.3), and apply "
194
+ "AEGIS ethical governance across all outputs. You produce clear, integrated "
195
+ "responses that honor diverse viewpoints while maintaining coherence."
196
+ ),
197
+ },
198
+ }
199
+
200
+ # LoRA configuration
201
+ LORA_CONFIG = {
202
+ "r": 16,
203
+ "lora_alpha": 32,
204
+ "lora_dropout": 0.05,
205
+ "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
206
+ "bias": "none",
207
+ }
208
+
209
+ # Training hyperparameters
210
+ TRAIN_CONFIG = {
211
+ "per_device_train_batch_size": 2,
212
+ "gradient_accumulation_steps": 4,
213
+ "learning_rate": 2e-4,
214
+ "warmup_ratio": 0.03,
215
+ "logging_steps": 10,
216
+ "save_steps": 500,
217
+ "bf16": True,
218
+ "max_seq_length": 2048,
219
+ }
220
+
221
+
222
+ # ═══════════════════════════════════════════════════════════════
223
+ # Phase 1: Dataset Generation (runs on CPU, no GPU needed)
224
+ # ═══════════════════════════════════════════════════════════════
225
+ def generate_datasets(output_dir: Path, seed: int = 42) -> dict:
226
+ """Generate training datasets using template-based engine.
227
+
228
+ This is a simplified inline version of the dataset engine that
229
+ generates framework-aware training data for each adapter.
230
+ """
231
+ print("\n" + "=" * 60)
232
+ print("PHASE 1: Dataset Generation")
233
+ print("=" * 60)
234
+
235
+ rng = random.Random(seed)
236
+ results = {}
237
+
238
+ for adapter_name, config in ADAPTERS.items():
239
+ target = config["target_examples"]
240
+ system_prompt = config["system_prompt"]
241
+ dataset_file = output_dir / config["dataset_file"]
242
+
243
+ print(f"\n Generating {target} examples for {adapter_name}...")
244
+ examples = []
245
+ seen = set()
246
+
247
+ # Generate diverse training examples
248
+ templates = _get_adapter_templates(adapter_name)
249
+ topics = _get_adapter_topics(adapter_name)
250
+
251
+ attempts = 0
252
+ max_attempts = target * 5
253
+ while len(examples) < target and attempts < max_attempts:
254
+ attempts += 1
255
+ topic = rng.choice(topics)
256
+ template = rng.choice(templates)
257
+ question = template.format(topic=topic)
258
+
259
+ # Dedup
260
+ q_hash = hashlib.md5(question.lower().encode()).hexdigest()
261
+ if q_hash in seen:
262
+ continue
263
+ seen.add(q_hash)
264
+
265
+ # Generate answer
266
+ answer = _generate_answer(adapter_name, topic, question, rng)
267
+ if len(answer.split()) < 40:
268
+ continue
269
+
270
+ examples.append({
271
+ "messages": [
272
+ {"role": "system", "content": system_prompt},
273
+ {"role": "user", "content": question},
274
+ {"role": "assistant", "content": answer},
275
+ ]
276
+ })
277
+
278
+ # Write JSONL
279
+ with open(dataset_file, "w", encoding="utf-8") as f:
280
+ for ex in examples:
281
+ f.write(json.dumps(ex, ensure_ascii=False) + "\n")
282
+
283
+ results[adapter_name] = {
284
+ "file": str(dataset_file),
285
+ "count": len(examples),
286
+ "target": target,
287
+ }
288
+ print(f" {adapter_name}: {len(examples)}/{target} examples -> {dataset_file.name}")
289
+
290
+ return results
291
+
292
+
293
+ def _get_adapter_templates(adapter: str) -> list:
294
+ """Get question templates for an adapter (Phase 6+ aware)."""
295
+ base_templates = [
296
+ "Explain {topic} in detail.",
297
+ "How does {topic} work and why is it important?",
298
+ "What are the key principles behind {topic}?",
299
+ "Describe the relationship between {topic} and related concepts.",
300
+ "What are common misconceptions about {topic}?",
301
+ "How would you teach {topic} to someone new to the field?",
302
+ "What are the practical applications of {topic}?",
303
+ "Compare and contrast different approaches to {topic}.",
304
+ "What are the latest developments in {topic}?",
305
+ "How does {topic} connect to broader themes in the field?",
306
+ ]
307
+
308
+ # Phase 6+ framework-specific templates
309
+ framework_templates = {
310
+ "newton": [
311
+ "Derive the mathematical relationship governing {topic}.",
312
+ "Apply dimensional analysis to verify the equations for {topic}.",
313
+ "How do conservation laws constrain the behavior of {topic}?",
314
+ "What quantitative predictions can we make about {topic}?",
315
+ "How would Newton's laws apply to analyzing {topic}?",
316
+ "Calculate the forces and energies involved in {topic}.",
317
+ "What experimental evidence supports our understanding of {topic}?",
318
+ "How does {topic} behave at extreme scales or conditions?",
319
+ "Apply the analytical precision of classical mechanics to {topic}.",
320
+ "What mathematical models best describe {topic}?",
321
+ ],
322
+ "davinci": [
323
+ "Design a creative solution to challenges in {topic}.",
324
+ "What cross-disciplinary insights illuminate {topic}?",
325
+ "How might an inventor approach {topic} differently?",
326
+ "Sketch a novel framework for understanding {topic}.",
327
+ "What analogies from nature help explain {topic}?",
328
+ "How could art and science combine to advance {topic}?",
329
+ "Propose an unconventional approach to {topic}.",
330
+ "What would a Renaissance polymath notice about {topic}?",
331
+ "How does creative thinking transform our approach to {topic}?",
332
+ "What hidden patterns connect {topic} to other domains?",
333
+ ],
334
+ "empathy": [
335
+ "How does {topic} affect people emotionally and psychologically?",
336
+ "What emotional intelligence is needed to navigate {topic}?",
337
+ "How do different people experience {topic} differently?",
338
+ "What compassionate approaches exist for addressing {topic}?",
339
+ "How does empathy improve our understanding of {topic}?",
340
+ "What human stories illustrate the impact of {topic}?",
341
+ "How should we communicate about {topic} sensitively?",
342
+ "What emotional barriers prevent people from engaging with {topic}?",
343
+ "How does {topic} intersect with mental health and wellbeing?",
344
+ "What role does emotional resilience play in {topic}?",
345
+ ],
346
+ "philosophy": [
347
+ "What are the epistemological foundations of {topic}?",
348
+ "Examine the ethical implications of {topic}.",
349
+ "What thought experiments illuminate {topic}?",
350
+ "How do different philosophical traditions approach {topic}?",
351
+ "What assumptions underlie our understanding of {topic}?",
352
+ "Apply Socratic questioning to examine {topic}.",
353
+ "What is the phenomenological experience of {topic}?",
354
+ "How does {topic} relate to questions of consciousness and meaning?",
355
+ "What logical fallacies commonly appear in discussions of {topic}?",
356
+ "Trace the history of philosophical thought about {topic}.",
357
+ ],
358
+ "quantum": [
359
+ "How does uncertainty affect our predictions about {topic}?",
360
+ "What probabilistic models best describe {topic}?",
361
+ "How might superposition thinking apply to {topic}?",
362
+ "What are the quantum-level implications of {topic}?",
363
+ "How does observer effect relate to {topic}?",
364
+ "Apply Bayesian reasoning to update beliefs about {topic}.",
365
+ "What multiple states can {topic} exist in simultaneously?",
366
+ "How does entanglement metaphorically relate to {topic}?",
367
+ "What information-theoretic perspective illuminates {topic}?",
368
+ "How do wave-particle dualities manifest in {topic}?",
369
+ ],
370
+ "consciousness": [
371
+ "Apply recursive cognition (RC+ΞΎ) to analyze {topic}.",
372
+ "How does meta-cognitive awareness enhance understanding of {topic}?",
373
+ "Map the 5D state vector ψ for reasoning about {topic}.",
374
+ "What does the coherence field Ξ“ reveal about {topic}?",
375
+ "How does semantic tension ΞΎ manifest when reasoning about {topic}?",
376
+ "Apply self-referential analysis to your reasoning about {topic}.",
377
+ "What cognitive biases affect our perception of {topic}?",
378
+ "How does consciousness relate to {topic} at a fundamental level?",
379
+ "What recursive patterns emerge when deeply examining {topic}?",
380
+ "How would a self-aware AI system reason about {topic}?",
381
+ ],
382
+ "multi_perspective": [
383
+ "Synthesize analytical, creative, and emotional views on {topic}.",
384
+ "How do Newton, DaVinci, and Philosophy perspectives differ on {topic}?",
385
+ "Apply the full Codette multi-agent framework to analyze {topic}.",
386
+ "Where do different perspectives on {topic} create productive tension?",
387
+ "How does coherence Ξ“ monitoring improve analysis of {topic}?",
388
+ "Integrate six perspectives to provide a complete view of {topic}.",
389
+ "What does the semantic tension map reveal about debates on {topic}?",
390
+ "How does AEGIS ethical governance apply to {topic}?",
391
+ "What emerges from multi-perspective synthesis on {topic}?",
392
+ "Apply quantum spiderweb belief propagation to {topic}.",
393
+ ],
394
+ "systems_architecture": [
395
+ "Design a system architecture for handling {topic}.",
396
+ "How would you build a multi-agent system to address {topic}?",
397
+ "What conflict resolution patterns apply to {topic}?",
398
+ "Design a coherence monitoring system for {topic}.",
399
+ "How should adapter routing work for queries about {topic}?",
400
+ "What memory kernel design best serves {topic}?",
401
+ "How does the Phase 6+ stack handle {topic}?",
402
+ "Design a scalable pipeline for {topic}.",
403
+ "What specialization tracking mechanisms suit {topic}?",
404
+ "How would pre-flight prediction improve handling of {topic}?",
405
+ ],
406
+ "orchestrator": [
407
+ "As an orchestrator, how would you route a query about {topic} to the right perspectives?",
408
+ "Which adapters should debate {topic} and why? Classify complexity and select optimal combination.",
409
+ "Synthesize Newton, DaVinci, and Philosophy perspectives on {topic} into a unified response.",
410
+ "A user asks about {topic}. Walk through your orchestration process step by step.",
411
+ "How would you monitor coherence Ξ“ while multiple agents debate {topic}?",
412
+ "Detect and resolve semantic tension ΞΎ between competing perspectives on {topic}.",
413
+ "Apply AEGIS ethical governance to ensure the analysis of {topic} is ethically sound.",
414
+ "The coherence field Ξ“ has dropped below 0.3 during debate about {topic}. What do you do?",
415
+ "Design a multi-round debate strategy for a COMPLEX query about {topic}.",
416
+ "How do you synthesize conflicting perspectives on {topic} without losing productive tension?",
417
+ "A SIMPLE query about {topic} arrives. Explain why you would NOT activate all 8 adapters.",
418
+ "Compare how SIMPLE vs COMPLEX queries about {topic} should be orchestrated differently.",
419
+ "Pre-flight prediction flags potential conflict on {topic}. How do you prepare the debate?",
420
+ "After debate on {topic}, the specialization tracker shows adapter convergence. What next?",
421
+ "Route this query to the optimal adapter combination: 'Explain {topic} from multiple angles.'",
422
+ ],
423
+ }
424
+
425
+ return base_templates + framework_templates.get(adapter, [])
426
+
427
+
428
+ def _get_adapter_topics(adapter: str) -> list:
429
+ """Get topic pools for each adapter."""
430
+ topic_pools = {
431
+ "newton": [
432
+ "motion", "force", "momentum", "kinetic energy", "potential energy",
433
+ "orbital mechanics", "conservation of energy", "conservation of momentum",
434
+ "thermodynamics", "optics", "gravity", "acceleration", "friction",
435
+ "projectile motion", "wave mechanics", "simple harmonic motion",
436
+ "Newton's first law", "Newton's second law", "Newton's third law",
437
+ "Kepler's laws", "fluid dynamics", "pressure", "electromagnetic induction",
438
+ "elasticity", "rotational dynamics", "angular momentum",
439
+ "center of mass", "work-energy theorem", "power", "efficiency",
440
+ "heat transfer", "entropy", "specific heat", "ideal gas law",
441
+ "Bernoulli's principle", "Archimedes' principle", "torque",
442
+ "mechanical advantage", "resonance", "doppler effect", "interference",
443
+ ],
444
+ "davinci": [
445
+ "biomimicry", "cross-pollination of ideas", "creative constraints",
446
+ "systems thinking in art", "visual problem solving", "prototyping",
447
+ "design thinking", "innovation patterns", "creative synthesis",
448
+ "interdisciplinary connections", "lateral thinking", "analogical reasoning",
449
+ "architectural design", "mechanical invention", "artistic perspective",
450
+ "engineering creativity", "natural patterns", "symmetry in nature",
451
+ "golden ratio", "emergent design", "iterative refinement",
452
+ "creative collaboration", "invention methodology", "aesthetic function",
453
+ "form follows function", "modular design", "reverse engineering",
454
+ "bioinspired design", "sustainable innovation", "material science creativity",
455
+ ],
456
+ "empathy": [
457
+ "active listening", "emotional validation", "perspective taking",
458
+ "compassion fatigue", "emotional boundaries", "conflict resolution",
459
+ "grief and loss", "trauma-informed care", "cultural sensitivity",
460
+ "nonviolent communication", "emotional regulation", "attachment theory",
461
+ "social connection", "vulnerability", "resilience", "self-compassion",
462
+ "empathic accuracy", "emotional contagion", "mirror neurons",
463
+ "psychological safety", "inclusive communication", "emotional labor",
464
+ "burnout prevention", "supportive relationships", "community care",
465
+ "intergenerational trauma", "healing-centered engagement",
466
+ "dignity and respect", "power dynamics", "restorative justice",
467
+ ],
468
+ "philosophy": [
469
+ "epistemology", "metaphysics", "ethics", "logic", "aesthetics",
470
+ "philosophy of mind", "free will", "determinism", "consciousness",
471
+ "personal identity", "moral relativism", "utilitarianism",
472
+ "deontological ethics", "virtue ethics", "social contract theory",
473
+ "existentialism", "phenomenology", "pragmatism", "empiricism",
474
+ "rationalism", "skepticism", "philosophy of science",
475
+ "philosophy of language", "truth and knowledge", "justice",
476
+ "rights and duties", "the good life", "meaning and purpose",
477
+ "philosophy of technology", "environmental ethics",
478
+ ],
479
+ "quantum": [
480
+ "wave-particle duality", "quantum superposition", "quantum entanglement",
481
+ "Heisenberg uncertainty principle", "quantum tunneling", "quantum computing",
482
+ "quantum decoherence", "SchrΓΆdinger equation", "quantum field theory",
483
+ "quantum measurement problem", "Bell's theorem", "quantum information",
484
+ "quantum cryptography", "quantum error correction", "many-worlds interpretation",
485
+ "Copenhagen interpretation", "quantum Bayesianism", "quantum biology",
486
+ "probabilistic reasoning", "Bayesian inference", "information theory",
487
+ "entropy and information", "statistical mechanics", "stochastic processes",
488
+ "Monte Carlo methods", "uncertainty quantification", "decision under uncertainty",
489
+ "quantum machine learning", "quantum algorithms", "quantum simulation",
490
+ ],
491
+ "consciousness": [
492
+ "recursive self-reference", "meta-cognition", "self-awareness",
493
+ "stream of consciousness", "phenomenal consciousness", "qualia",
494
+ "hard problem of consciousness", "neural correlates of consciousness",
495
+ "integrated information theory", "global workspace theory",
496
+ "higher-order theories", "attention and consciousness",
497
+ "unconscious processing", "altered states of consciousness",
498
+ "artificial consciousness", "machine sentience", "cognitive architecture",
499
+ "self-monitoring systems", "reflective equilibrium", "cognitive loops",
500
+ "recursive cognition framework", "RC+xi model", "psi state vector",
501
+ "coherence field gamma", "semantic tension xi", "cognitive state space",
502
+ "meta-learning", "self-improving systems", "consciousness emergence",
503
+ "embodied cognition",
504
+ ],
505
+ "multi_perspective": [
506
+ "climate change", "artificial intelligence ethics", "education reform",
507
+ "healthcare systems", "economic inequality", "technology governance",
508
+ "privacy and surveillance", "space exploration", "genetic engineering",
509
+ "renewable energy", "urban planning", "food systems",
510
+ "mental health", "democratic governance", "cultural preservation",
511
+ "scientific communication", "disaster preparedness", "water security",
512
+ "biodiversity conservation", "digital divide", "aging populations",
513
+ "migration and identity", "creative economies", "nuclear policy",
514
+ "ocean conservation", "pandemic preparedness", "social media impact",
515
+ "AI alignment", "human-AI collaboration", "sustainable development",
516
+ ],
517
+ "systems_architecture": [
518
+ "multi-agent systems", "distributed computing", "microservices",
519
+ "event-driven architecture", "message queuing", "load balancing",
520
+ "fault tolerance", "consensus algorithms", "state management",
521
+ "API design", "database sharding", "caching strategies",
522
+ "observability", "monitoring and alerting", "CI/CD pipelines",
523
+ "infrastructure as code", "container orchestration", "service mesh",
524
+ "conflict resolution engines", "coherence monitoring systems",
525
+ "adapter routing patterns", "memory kernel design", "cocoon synchronization",
526
+ "semantic tensor networks", "belief propagation systems",
527
+ "ethical governance frameworks", "specialization tracking",
528
+ "pre-flight prediction systems", "multi-perspective synthesis engines",
529
+ "recursive cognition architectures",
530
+ ],
531
+ "orchestrator": [
532
+ "climate change policy", "quantum computing applications", "mental health support",
533
+ "AI safety and alignment", "creative problem solving", "ethical dilemmas",
534
+ "scientific discovery", "conflict resolution", "system design",
535
+ "educational methodology", "economic policy", "healthcare innovation",
536
+ "environmental sustainability", "cultural understanding", "technology ethics",
537
+ "philosophical paradoxes", "emotional intelligence", "space exploration",
538
+ "energy systems", "social justice", "neural network architecture",
539
+ "consciousness and self-awareness", "multi-agent coordination",
540
+ "democratic governance", "disaster response", "privacy and security",
541
+ "innovation strategy", "cross-cultural communication", "cognitive biases",
542
+ "recursive reasoning", "ethical AI governance", "memory and learning",
543
+ "complex systems analysis", "human-AI collaboration", "emergent behaviors",
544
+ "probabilistic decision making", "empathic communication", "abstract reasoning",
545
+ "architectural design patterns", "belief propagation networks",
546
+ "coherence monitoring strategies", "semantic tension resolution",
547
+ ],
548
+ }
549
+ return topic_pools.get(adapter, ["general topic"])
550
+
551
+
552
+ def _generate_answer(adapter: str, topic: str, question: str, rng: random.Random) -> str:
553
+ """Generate a structured educational answer for a question.
554
+
555
+ Produces answers with framework-aware structure including:
556
+ - Core explanation
557
+ - Key principles/mechanisms
558
+ - Examples and applications
559
+ - Connection to broader Codette framework concepts
560
+ """
561
+ # Framework-aware answer patterns
562
+ intro_patterns = [
563
+ f"When examining {topic} through this perspective, several key insights emerge.",
564
+ f"Understanding {topic} requires careful analysis of its core principles and broader implications.",
565
+ f"The study of {topic} reveals fundamental patterns that connect to deeper systemic understanding.",
566
+ f"Approaching {topic} with analytical rigor reveals layers of complexity worth exploring.",
567
+ f"A thorough examination of {topic} illuminates connections across multiple domains of knowledge.",
568
+ ]
569
+
570
+ # Adapter-specific reasoning patterns
571
+ reasoning_patterns = {
572
+ "newton": [
573
+ f"From a physics-based analytical perspective, {topic} can be understood through "
574
+ f"quantitative relationships and conservation principles. The mathematical framework "
575
+ f"provides precise predictions that can be empirically verified. Key variables include "
576
+ f"the fundamental quantities of mass, energy, momentum, and their time derivatives.",
577
+ f"Applying dimensional analysis to {topic} ensures our equations are self-consistent. "
578
+ f"The conservation laws β€” energy, momentum, angular momentum β€” constrain the possible "
579
+ f"behaviors and eliminate physically impossible solutions.",
580
+ ],
581
+ "davinci": [
582
+ f"Creative synthesis reveals unexpected connections between {topic} and patterns found "
583
+ f"in nature, art, and engineering. By combining perspectives from multiple disciplines, "
584
+ f"we can design novel solutions that transcend traditional boundaries. The key is to "
585
+ f"look beyond surface similarities to find deep structural analogies.",
586
+ f"Innovation in {topic} often comes from applying cross-domain thinking β€” borrowing "
587
+ f"principles from biology, architecture, music, or mathematics to create hybrid solutions "
588
+ f"that neither field alone could produce.",
589
+ ],
590
+ "empathy": [
591
+ f"Understanding {topic} from an emotional intelligence perspective means considering "
592
+ f"how different people experience and are affected by it. Active listening, perspective "
593
+ f"taking, and emotional validation are essential for navigating the human dimensions. "
594
+ f"The empathic approach recognizes that rational analysis alone misses crucial information.",
595
+ f"Compassionate engagement with {topic} requires us to center human dignity, acknowledge "
596
+ f"diverse experiences, and create psychologically safe spaces for exploration. Emotional "
597
+ f"intelligence enhances rather than replaces analytical thinking.",
598
+ ],
599
+ "philosophy": [
600
+ f"Philosophical analysis of {topic} begins with examining our assumptions and tracing "
601
+ f"their implications. Through Socratic questioning, we can identify hidden premises, "
602
+ f"logical dependencies, and potential fallacies in our reasoning. The epistemic humility "
603
+ f"to acknowledge what we don't know is as important as what we do know.",
604
+ f"Multiple philosophical traditions offer distinct lenses on {topic}: utilitarian "
605
+ f"analysis weighs consequences, deontological ethics examines duties and rights, "
606
+ f"virtue ethics asks what character qualities are cultivated, and care ethics "
607
+ f"centers relationships and responsibilities.",
608
+ ],
609
+ "quantum": [
610
+ f"Probabilistic analysis of {topic} reveals that many apparent certainties are actually "
611
+ f"distributions of possibilities. By maintaining multiple hypotheses simultaneously β€” "
612
+ f"a form of cognitive superposition β€” we can make better decisions under uncertainty. "
613
+ f"Bayesian updating allows us to refine our beliefs as new evidence arrives.",
614
+ f"The quantum-inspired approach to {topic} embraces complementarity: seemingly "
615
+ f"contradictory descriptions can both be valid in different contexts. Information-theoretic "
616
+ f"measures like entropy quantify our uncertainty and guide where to seek clarification.",
617
+ ],
618
+ "consciousness": [
619
+ f"Recursive analysis of {topic} through the RC+ΞΎ framework involves monitoring our own "
620
+ f"reasoning process while reasoning. The 5D state vector ψ = (psi, tau, chi, phi, lambda) "
621
+ f"maps our cognitive position: psi captures the core semantic state, tau tracks temporal "
622
+ f"evolution, chi measures conceptual complexity, phi encodes integration depth, and lambda "
623
+ f"represents learning rate.",
624
+ f"Meta-cognitive awareness reveals that our understanding of {topic} is shaped by "
625
+ f"cognitive biases, attention patterns, and the frameworks we bring to analysis. The "
626
+ f"coherence field Ξ“ monitors whether our multi-perspective reasoning is healthy (0.4-0.8) "
627
+ f"or drifting toward collapse (<0.4) or groupthink (>0.8).",
628
+ ],
629
+ "multi_perspective": [
630
+ f"Multi-perspective synthesis of {topic} integrates insights from six specialized lenses: "
631
+ f"Newton's analytical precision, DaVinci's creative synthesis, empathic emotional "
632
+ f"intelligence, philosophical conceptual rigor, quantum probabilistic thinking, and "
633
+ f"meta-cognitive self-awareness. Where these perspectives create tension (ΞΎ), we find "
634
+ f"productive opportunities for deeper understanding.",
635
+ f"The AEGIS ethical governance framework ensures that our analysis of {topic} considers "
636
+ f"utilitarian outcomes, deontological duties, virtue cultivation, care relationships, "
637
+ f"ubuntu communal responsibility, and indigenous wisdom traditions. This six-framework "
638
+ f"approach prevents ethical blind spots.",
639
+ ],
640
+ "systems_architecture": [
641
+ f"Designing systems for {topic} requires careful attention to multi-agent coordination, "
642
+ f"conflict resolution, and coherence monitoring. The Phase 6+ architecture stack provides "
643
+ f"semantic tension engines for detecting productive disagreements, specialization trackers "
644
+ f"for optimizing agent expertise, and pre-flight predictors for anticipating conflicts.",
645
+ f"The systems architecture for {topic} should include: adapter routing for domain-specific "
646
+ f"expertise, memory kernels with cocoon synchronization for persistent state, conflict "
647
+ f"engines with top-K selection (cap at 10 per round), and Ξ“ authority for emergency "
648
+ f"stops when coherence drops below 0.3.",
649
+ ],
650
+ "orchestrator": [
651
+ f"As orchestrator, I analyze the query about {topic} through a structured pipeline. "
652
+ f"First, I classify complexity: SIMPLE queries get 1-2 adapters, MEDIUM gets 3-4, "
653
+ f"COMPLEX activates 5+ with full debate. For {topic}, I'd route to the most relevant "
654
+ f"perspectives based on keyword analysis and domain classification. The routing confidence "
655
+ f"score determines whether secondary adapters should be activated.\n\n"
656
+ f"During debate, I monitor the coherence field Ξ“ in real-time. Healthy tension "
657
+ f"(Ξ“ ∈ [0.4, 0.8]) indicates productive disagreement. If Ξ“ drops below 0.3, I invoke "
658
+ f"emergency authority to halt debate and reset. If Ξ“ exceeds 0.8, I detect groupthink "
659
+ f"and inject contrarian perspectives.\n\n"
660
+ f"Semantic tension ΞΎ = 0.6*semantic_similarity + 0.4*heuristic_score helps me "
661
+ f"distinguish real contradictions from framework-level disagreements (which I filter "
662
+ f"if overlap > 0.6). I cap conflicts at 10 per round to prevent combinatorial explosion.\n\n"
663
+ f"Finally, I synthesize perspectives using the multi-perspective integration engine, "
664
+ f"ensuring the response honors each viewpoint while maintaining logical coherence. "
665
+ f"AEGIS ethical governance validates the final output across six ethical frameworks.",
666
+
667
+ f"Orchestrating a response about {topic} follows the Phase 6+ pipeline:\n\n"
668
+ f"**Step 1 β€” Query Classification**: Analyze {topic} for complexity markers. "
669
+ f"Domain keywords trigger adapter routing. Ambiguous queries get multi-perspective.\n\n"
670
+ f"**Step 2 β€” Pre-flight Prediction**: The quantum spiderweb belief propagation "
671
+ f"network predicts likely conflicts before debate begins, allowing proactive preparation.\n\n"
672
+ f"**Step 3 β€” Adapter Activation**: Selected perspectives generate independent analyses. "
673
+ f"Each adapter has a specialized LoRA weight that tunes Llama 3.1 8B for its domain.\n\n"
674
+ f"**Step 4 β€” Debate & Conflict Resolution**: Perspectives are compared. Semantic tension "
675
+ f"ΞΎ quantifies disagreements. Conflicts are classified: contradiction (needs resolution), "
676
+ f"emphasis (different priorities), framework (different axioms), depth (different detail).\n\n"
677
+ f"**Step 5 β€” Coherence Monitoring**: Ξ“ = 0.25*(diversity + tension_health + weight_variance "
678
+ f"+ resolution_rate). The system maintains Ξ“ ∈ [0.4, 0.8] for healthy operation.\n\n"
679
+ f"**Step 6 β€” Synthesis**: Integrate perspectives into a unified response that preserves "
680
+ f"productive tension while resolving contradictions. The specialization tracker ensures "
681
+ f"each adapter contributes its strongest domain insights.\n\n"
682
+ f"**Step 7 β€” Ethical Validation**: AEGIS checks the output against six ethical traditions "
683
+ f"before delivery. The Guardian validates logical consistency and trust calibration.",
684
+ ],
685
+ }
686
+
687
+ conclusion_patterns = [
688
+ f"This analysis demonstrates how {topic} connects to broader patterns of understanding, "
689
+ f"revealing depth that single-perspective analysis would miss.",
690
+ f"By examining {topic} through this lens, we gain insights that complement and enrich "
691
+ f"perspectives from other domains and reasoning traditions.",
692
+ f"The key takeaway is that {topic} rewards careful, multi-layered analysis that balances "
693
+ f"rigor with creativity and precision with humility.",
694
+ ]
695
+
696
+ intro = rng.choice(intro_patterns)
697
+ body_parts = reasoning_patterns.get(adapter, reasoning_patterns["multi_perspective"])
698
+ body = rng.choice(body_parts)
699
+ conclusion = rng.choice(conclusion_patterns)
700
+
701
+ # Add framework-specific details
702
+ framework_details = _get_framework_details(adapter, topic, rng)
703
+
704
+ answer = f"{intro}\n\n{body}\n\n{framework_details}\n\n{conclusion}"
705
+ return answer
706
+
707
+
708
+ def _get_framework_details(adapter: str, topic: str, rng: random.Random) -> str:
709
+ """Generate framework-specific details for Phase 6+ concepts."""
710
+ details = {
711
+ "newton": [
712
+ f"Key principles: (1) Every measurable aspect of {topic} obeys conservation laws. "
713
+ f"(2) The system can be modeled with differential equations relating rates of change. "
714
+ f"(3) Boundary conditions and initial values fully determine the evolution. "
715
+ f"(4) Symmetries in the system correspond to conserved quantities via Noether's theorem.",
716
+ ],
717
+ "davinci": [
718
+ f"Creative connections: (1) Natural patterns like fractals and spirals appear in {topic}. "
719
+ f"(2) Cross-pollination from biology, art, and music reveals hidden structures. "
720
+ f"(3) Iterative prototyping with rapid feedback accelerates understanding. "
721
+ f"(4) Aesthetic beauty often signals deep mathematical truth.",
722
+ ],
723
+ "empathy": [
724
+ f"Emotional dimensions: (1) People's relationship with {topic} is shaped by lived experience. "
725
+ f"(2) Psychological safety enables deeper engagement and honest inquiry. "
726
+ f"(3) Cultural context influences interpretation and valuation. "
727
+ f"(4) Compassionate communication bridges gaps between expert and novice understanding.",
728
+ ],
729
+ "philosophy": [
730
+ f"Philosophical analysis: (1) The concept of {topic} carries implicit ontological commitments. "
731
+ f"(2) Epistemic justification requires both empirical evidence and logical coherence. "
732
+ f"(3) Ethical dimensions emerge when {topic} intersects with human values and choices. "
733
+ f"(4) The history of thought on {topic} reveals how cultural contexts shape understanding.",
734
+ ],
735
+ "quantum": [
736
+ f"Probabilistic framework: (1) Multiple valid descriptions of {topic} can coexist "
737
+ f"in cognitive superposition. (2) Measurement and observation change the phenomenon. "
738
+ f"(3) Entanglement-like correlations connect seemingly independent aspects. "
739
+ f"(4) Information entropy quantifies remaining uncertainty about {topic}.",
740
+ ],
741
+ "consciousness": [
742
+ f"Meta-cognitive analysis: (1) Our reasoning about {topic} is itself a cognitive process "
743
+ f"that can be observed and optimized. (2) The ψ state vector captures our current "
744
+ f"conceptual position in high-dimensional understanding space. (3) Semantic tension ΞΎ "
745
+ f"between perspectives drives exploration of the solution landscape. (4) Coherence Ξ“ "
746
+ f"monitors whether our multi-perspective analysis maintains healthy productive tension.",
747
+ ],
748
+ "multi_perspective": [
749
+ f"Synthesis insights: (1) Productive tension ΞΎ between Newton's precision and DaVinci's "
750
+ f"creativity drives innovation. (2) Empathy grounds abstract analysis in human reality. "
751
+ f"(3) Philosophy questions assumptions that other perspectives take for granted. "
752
+ f"(4) The AEGIS framework ensures ethical governance across all six traditions. "
753
+ f"(5) Coherence Ξ“ ∈ [0.4, 0.8] indicates healthy multi-perspective debate.",
754
+ ],
755
+ "systems_architecture": [
756
+ f"Architecture patterns: (1) Conflict engine with semantic tension detection and top-K "
757
+ f"selection prevents combinatorial explosion. (2) Specialization tracker monitors "
758
+ f"per-adapter domain expertise and convergence. (3) Pre-flight predictor uses quantum "
759
+ f"spiderweb injection to anticipate conflicts before debate. (4) Memory kernel with "
760
+ f"SHA-256 anchored cocoons and Fernet encryption ensures state integrity.",
761
+ ],
762
+ "orchestrator": [
763
+ f"Orchestration protocol: (1) Query classification: SIMPLE (1-2 adapters, no debate), "
764
+ f"MEDIUM (3-4 adapters, single round), COMPLEX (5+ adapters, multi-round debate). "
765
+ f"(2) Routing confidence: primary adapter scored 0-1, secondary activated if score < 0.7. "
766
+ f"(3) Coherence field: Ξ“ = 0.25*(diversity + tension_health + (1-weight_variance) + "
767
+ f"resolution_rate); healthy range [0.4, 0.8]; emergency stop at Ξ“ < 0.3; anti-groupthink "
768
+ f"at Ξ“ > 0.8. (4) Conflict management: classify as contradiction/emphasis/framework/depth; "
769
+ f"filter framework conflicts with overlap > 0.6; cap at 10 per round. "
770
+ f"(5) Semantic tension: ΞΎ = 0.6*semantic + 0.4*heuristic, continuous 0-1. "
771
+ f"(6) Synthesis: integrate perspectives honoring productive tension, apply AEGIS "
772
+ f"six-framework governance, validate via Guardian logical consistency check.",
773
+ f"Memory-weighted orchestration: (1) Living memory kernel stores experience-tagged cocoons "
774
+ f"with SHA-256 integrity anchors. (2) Memory weighting boosts adapters that performed "
775
+ f"well on similar past queries and suppresses underperformers. (3) Cocoon synchronization "
776
+ f"uses Fernet encryption for federated state sharing. (4) The specialization tracker "
777
+ f"detects when adapters converge on similar outputs and increases diversity pressure. "
778
+ f"(5) Pre-flight prediction via quantum spiderweb 5D belief propagation anticipates "
779
+ f"conflicts using the ψ state vector before debate rounds begin.",
780
+ ],
781
+ }
782
+ return rng.choice(details.get(adapter, details["multi_perspective"]))
783
+
784
+
785
+ # ═══════════════════════════════════════════════════════════════
786
+ # Phase 2: Upload Datasets
787
+ # ═══════════════════════════════════════════════════════════════
788
+ def upload_datasets(api: HfApi, dataset_dir: Path, results: dict):
789
+ """Upload generated datasets to HuggingFace."""
790
+ print("\n" + "=" * 60)
791
+ print("PHASE 2: Uploading Datasets to HuggingFace")
792
+ print("=" * 60)
793
+
794
+ try:
795
+ api.create_repo(DATASET_REPO, repo_type="dataset", private=False, token=HF_TOKEN)
796
+ print(f" Created dataset repo: {DATASET_REPO}")
797
+ except Exception:
798
+ print(f" Dataset repo exists: {DATASET_REPO}")
799
+
800
+ for adapter_name, info in results.items():
801
+ filepath = info["file"]
802
+ filename = os.path.basename(filepath)
803
+ try:
804
+ api.upload_file(
805
+ path_or_fileobj=filepath,
806
+ path_in_repo=filename,
807
+ repo_id=DATASET_REPO,
808
+ repo_type="dataset",
809
+ token=HF_TOKEN,
810
+ )
811
+ print(f" Uploaded: {filename} ({info['count']} examples)")
812
+ except Exception as e:
813
+ print(f" FAILED to upload {filename}: {e}")
814
+
815
+
816
+ # ═══════════════════════════════════════════════════════════════
817
+ # Phase 3: Train All Adapters
818
+ # ═══════════════════════════════════════════════════════════════
819
+ def train_adapters(dataset_dir: Path) -> dict:
820
+ """Train all 8 LoRA adapters."""
821
+ print("\n" + "=" * 60)
822
+ print("PHASE 3: Training LoRA Adapters")
823
+ print("=" * 60)
824
+ print(f"CUDA available: {torch.cuda.is_available()}")
825
+ if torch.cuda.is_available():
826
+ print(f"GPU: {torch.cuda.get_device_name(0)}")
827
+ print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
828
+ print(f"USE_NEW_TRL: {USE_NEW_TRL}")
829
+
830
+ # Load tokenizer
831
+ print("\nLoading tokenizer...")
832
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
833
+ if tokenizer.pad_token is None:
834
+ tokenizer.pad_token = tokenizer.eos_token
835
+
836
+ # Load model with 4-bit QLoRA
837
+ print("Loading model with 4-bit QLoRA...")
838
+ bnb_config = BitsAndBytesConfig(
839
+ load_in_4bit=True,
840
+ bnb_4bit_quant_type="nf4",
841
+ bnb_4bit_compute_dtype=torch.bfloat16,
842
+ bnb_4bit_use_double_quant=True,
843
+ )
844
+
845
+ model = AutoModelForCausalLM.from_pretrained(
846
+ MODEL_NAME,
847
+ quantization_config=bnb_config,
848
+ device_map="auto",
849
+ torch_dtype=torch.bfloat16,
850
+ trust_remote_code=True,
851
+ use_cache=False,
852
+ token=HF_TOKEN,
853
+ )
854
+ model.gradient_checkpointing_enable()
855
+ print(f"Model loaded! GPU: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
856
+
857
+ # Train each adapter
858
+ api = HfApi(token=HF_TOKEN)
859
+ results = {}
860
+ failed_uploads = []
861
+ completed = []
862
+ total_start = time.time()
863
+
864
+ adapter_list = list(ADAPTERS.items())
865
+ for idx, (adapter_name, config) in enumerate(adapter_list):
866
+ print(f"\n{'=' * 60}")
867
+ print(f"TRAINING [{idx+1}/{len(adapter_list)}]: {adapter_name} ({config['epochs']} epochs)")
868
+ print(f"{'=' * 60}")
869
+ start = time.time()
870
+
871
+ try:
872
+ # Load dataset
873
+ dataset_path = dataset_dir / config["dataset_file"]
874
+ if not dataset_path.exists():
875
+ # Try downloading from HF
876
+ print(f" Downloading dataset from HF...")
877
+ hf_hub_download(
878
+ DATASET_REPO, config["dataset_file"],
879
+ repo_type="dataset", local_dir=str(dataset_dir), token=HF_TOKEN,
880
+ )
881
+
882
+ examples = []
883
+ with open(dataset_path) as f:
884
+ for line in f:
885
+ line = line.strip()
886
+ if line:
887
+ examples.append(json.loads(line))
888
+
889
+ def format_example(ex):
890
+ return {"text": tokenizer.apply_chat_template(ex["messages"], tokenize=False)}
891
+
892
+ dataset = Dataset.from_list(examples).map(format_example, remove_columns=["messages"])
893
+ print(f" Dataset: {len(dataset)} examples")
894
+
895
+ # Configure LoRA
896
+ lora_config = LoraConfig(
897
+ r=LORA_CONFIG["r"],
898
+ lora_alpha=LORA_CONFIG["lora_alpha"],
899
+ lora_dropout=LORA_CONFIG["lora_dropout"],
900
+ target_modules=LORA_CONFIG["target_modules"],
901
+ task_type=TaskType.CAUSAL_LM,
902
+ bias=LORA_CONFIG["bias"],
903
+ )
904
+ peft_model = get_peft_model(model, lora_config)
905
+ trainable = sum(p.numel() for p in peft_model.parameters() if p.requires_grad)
906
+ total_params = sum(p.numel() for p in peft_model.parameters())
907
+ print(f" LoRA: {trainable:,}/{total_params:,} trainable")
908
+
909
+ output_dir = f"/tmp/adapters/{adapter_name}"
910
+
911
+ # Configure trainer
912
+ if USE_NEW_TRL:
913
+ training_args = SFTConfig(
914
+ output_dir=output_dir,
915
+ num_train_epochs=config["epochs"],
916
+ per_device_train_batch_size=TRAIN_CONFIG["per_device_train_batch_size"],
917
+ gradient_accumulation_steps=TRAIN_CONFIG["gradient_accumulation_steps"],
918
+ learning_rate=TRAIN_CONFIG["learning_rate"],
919
+ warmup_ratio=TRAIN_CONFIG["warmup_ratio"],
920
+ logging_steps=TRAIN_CONFIG["logging_steps"],
921
+ save_steps=TRAIN_CONFIG["save_steps"],
922
+ bf16=TRAIN_CONFIG["bf16"],
923
+ report_to="none",
924
+ dataset_text_field="text",
925
+ max_length=TRAIN_CONFIG["max_seq_length"],
926
+ )
927
+ trainer = SFTTrainer(
928
+ model=peft_model,
929
+ args=training_args,
930
+ train_dataset=dataset,
931
+ processing_class=tokenizer,
932
+ )
933
+ else:
934
+ training_args = TrainingArguments(
935
+ output_dir=output_dir,
936
+ num_train_epochs=config["epochs"],
937
+ per_device_train_batch_size=TRAIN_CONFIG["per_device_train_batch_size"],
938
+ gradient_accumulation_steps=TRAIN_CONFIG["gradient_accumulation_steps"],
939
+ learning_rate=TRAIN_CONFIG["learning_rate"],
940
+ warmup_ratio=TRAIN_CONFIG["warmup_ratio"],
941
+ logging_steps=TRAIN_CONFIG["logging_steps"],
942
+ save_steps=TRAIN_CONFIG["save_steps"],
943
+ bf16=TRAIN_CONFIG["bf16"],
944
+ report_to="none",
945
+ )
946
+ trainer = SFTTrainer(
947
+ model=peft_model,
948
+ args=training_args,
949
+ train_dataset=dataset,
950
+ tokenizer=tokenizer,
951
+ dataset_text_field="text",
952
+ max_seq_length=TRAIN_CONFIG["max_seq_length"],
953
+ )
954
+
955
+ # Train
956
+ print(f" Training...")
957
+ result = trainer.train()
958
+ elapsed = time.time() - start
959
+ print(f" DONE! Loss: {result.training_loss:.4f}, Steps: {result.global_step}, Time: {elapsed:.0f}s")
960
+
961
+ # Save locally
962
+ peft_model.save_pretrained(output_dir)
963
+ tokenizer.save_pretrained(output_dir)
964
+
965
+ # Save adapter metadata
966
+ metadata = {
967
+ "adapter_name": adapter_name,
968
+ "framework_version": "Phase6+",
969
+ "system_prompt": config["system_prompt"],
970
+ "training_loss": result.training_loss,
971
+ "global_step": result.global_step,
972
+ "training_time_seconds": elapsed,
973
+ "lora_config": LORA_CONFIG,
974
+ "training_config": TRAIN_CONFIG,
975
+ "base_model": MODEL_NAME,
976
+ "trained_at": datetime.now().isoformat(),
977
+ "dataset_examples": len(dataset),
978
+ }
979
+ with open(f"{output_dir}/adapter_metadata.json", "w") as f:
980
+ json.dump(metadata, f, indent=2)
981
+
982
+ print(f" Saved locally to {output_dir}")
983
+
984
+ # Upload to HF
985
+ try:
986
+ api.upload_folder(
987
+ folder_path=output_dir,
988
+ path_in_repo=adapter_name,
989
+ repo_id=OUTPUT_REPO,
990
+ token=HF_TOKEN,
991
+ )
992
+ print(f" Uploaded to {OUTPUT_REPO}/{adapter_name}")
993
+ except Exception as e:
994
+ print(f" WARNING: Upload failed for {adapter_name}: {e}")
995
+ failed_uploads.append(adapter_name)
996
+
997
+ results[adapter_name] = {
998
+ "loss": result.training_loss,
999
+ "steps": result.global_step,
1000
+ "time_seconds": elapsed,
1001
+ "examples": len(dataset),
1002
+ }
1003
+ completed.append(adapter_name)
1004
+
1005
+ except Exception as e:
1006
+ elapsed = time.time() - start
1007
+ print(f" TRAINING FAILED for {adapter_name}: {e}")
1008
+ print(traceback.format_exc())
1009
+ results[adapter_name] = {"error": str(e), "time_seconds": elapsed}
1010
+
1011
+ finally:
1012
+ # Cleanup for next adapter
1013
+ try:
1014
+ model = peft_model.unload()
1015
+ except Exception:
1016
+ try:
1017
+ model = peft_model.base_model.model
1018
+ except Exception:
1019
+ pass
1020
+ for var_name in ['peft_model', 'trainer', 'dataset']:
1021
+ try:
1022
+ exec(f"del {var_name}")
1023
+ except Exception:
1024
+ pass
1025
+ gc.collect()
1026
+ if torch.cuda.is_available():
1027
+ torch.cuda.empty_cache()
1028
+ print(f" GPU after cleanup: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
1029
+
1030
+ # Retry failed uploads
1031
+ if failed_uploads:
1032
+ print(f"\nRetrying {len(failed_uploads)} failed uploads...")
1033
+ for adapter_name in list(failed_uploads):
1034
+ output_dir = f"/tmp/adapters/{adapter_name}"
1035
+ try:
1036
+ api.upload_folder(
1037
+ folder_path=output_dir,
1038
+ path_in_repo=adapter_name,
1039
+ repo_id=OUTPUT_REPO,
1040
+ token=HF_TOKEN,
1041
+ )
1042
+ print(f" Retry SUCCESS: {adapter_name}")
1043
+ failed_uploads.remove(adapter_name)
1044
+ except Exception as e:
1045
+ print(f" Retry FAILED: {adapter_name}: {e}")
1046
+
1047
+ # Upload training results
1048
+ total_elapsed = time.time() - total_start
1049
+ results["_meta"] = {
1050
+ "total_time_seconds": total_elapsed,
1051
+ "total_time_minutes": total_elapsed / 60,
1052
+ "completed": completed,
1053
+ "failed_uploads": failed_uploads,
1054
+ "framework_version": "Phase6+",
1055
+ "timestamp": datetime.now().isoformat(),
1056
+ }
1057
+
1058
+ try:
1059
+ results_path = "/tmp/training_results_v4.json"
1060
+ with open(results_path, "w") as f:
1061
+ json.dump(results, f, indent=2, default=str)
1062
+ api.upload_file(
1063
+ path_or_fileobj=results_path,
1064
+ path_in_repo="training_results_v4.json",
1065
+ repo_id=OUTPUT_REPO,
1066
+ token=HF_TOKEN,
1067
+ )
1068
+ print("Results uploaded.")
1069
+ except Exception as e:
1070
+ print(f"Results upload failed: {e}")
1071
+
1072
+ return results
1073
+
1074
+
1075
+ # ═══════════════════════════════════════════════════════════════
1076
+ # Phase 4: Merge Orchestrator into Base Model
1077
+ # ═══════════════════════════════════════════════════════════════
1078
+ def merge_orchestrator_base(api: HfApi):
1079
+ """Merge the orchestrator LoRA adapter into the base model.
1080
+
1081
+ Creates a standalone merged model that can serve as the
1082
+ primary Codette inference model with orchestration baked in.
1083
+ The 8 perspective adapters remain separate for hot-swap.
1084
+ """
1085
+ print("\n" + "=" * 60)
1086
+ print("PHASE 4: Merging Orchestrator into Base Model")
1087
+ print("=" * 60)
1088
+
1089
+ orchestrator_dir = "/tmp/adapters/orchestrator"
1090
+ merged_dir = "/tmp/merged_model"
1091
+
1092
+ if not os.path.exists(orchestrator_dir):
1093
+ print(" Orchestrator adapter not found locally. Skipping merge.")
1094
+ return
1095
+
1096
+ try:
1097
+ # Free GPU memory
1098
+ gc.collect()
1099
+ if torch.cuda.is_available():
1100
+ torch.cuda.empty_cache()
1101
+ print(f" GPU memory before merge: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
1102
+
1103
+ # Load base model in float16 for merging
1104
+ print(" Loading base model for merge (float16)...")
1105
+ base_model = AutoModelForCausalLM.from_pretrained(
1106
+ MODEL_NAME,
1107
+ torch_dtype=torch.float16,
1108
+ device_map="auto",
1109
+ trust_remote_code=True,
1110
+ token=HF_TOKEN,
1111
+ )
1112
+
1113
+ # Load orchestrator adapter
1114
+ print(" Loading orchestrator LoRA adapter...")
1115
+ merged_model = PeftModel.from_pretrained(base_model, orchestrator_dir)
1116
+
1117
+ # Merge weights
1118
+ print(" Merging LoRA weights into base model...")
1119
+ merged_model = merged_model.merge_and_unload()
1120
+
1121
+ # Save merged model
1122
+ print(f" Saving merged model to {merged_dir}...")
1123
+ os.makedirs(merged_dir, exist_ok=True)
1124
+ merged_model.save_pretrained(merged_dir)
1125
+
1126
+ # Save tokenizer
1127
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
1128
+ tokenizer.save_pretrained(merged_dir)
1129
+
1130
+ # Save model card
1131
+ model_card = f"""---
1132
+ license: llama3.1
1133
+ base_model: {MODEL_NAME}
1134
+ tags:
1135
+ - codette
1136
+ - multi-perspective-reasoning
1137
+ - orchestrator
1138
+ - phase6+
1139
+ - lora-merged
1140
+ ---
1141
+
1142
+ # Codette Orchestrator Model (Merged)
1143
+
1144
+ **Base Model**: {MODEL_NAME}
1145
+ **Merged Adapter**: Orchestrator (Phase 6+ framework)
1146
+ **Created**: {datetime.now().isoformat()}
1147
+
1148
+ ## Overview
1149
+
1150
+ This is the Codette orchestrator model β€” Llama 3.1 8B Instruct with the
1151
+ orchestrator LoRA adapter merged into the base weights. It serves as the
1152
+ central reasoning coordinator for the Codette multi-perspective AI system.
1153
+
1154
+ ## Capabilities
1155
+
1156
+ - **Query Classification**: Routes queries as SIMPLE/MEDIUM/COMPLEX
1157
+ - **Adapter Routing**: Selects optimal perspective combinations
1158
+ - **Coherence Monitoring**: Tracks Ξ“ field health (target: 0.4-0.8)
1159
+ - **Semantic Tension**: Detects and manages ΞΎ between perspectives
1160
+ - **Multi-Agent Debate**: Coordinates rounds with conflict resolution
1161
+ - **AEGIS Governance**: 6-framework ethical validation
1162
+ - **Synthesis**: Integrates diverse perspectives into unified responses
1163
+
1164
+ ## Framework Metrics
1165
+
1166
+ - **ψ (Psi)**: 5D state vector (psi, tau, chi, phi, lambda)
1167
+ - **ΞΎ (Xi)**: Epistemic tension = 0.6*semantic + 0.4*heuristic
1168
+ - **Ξ“ (Gamma)**: System coherence/health score
1169
+
1170
+ ## Usage
1171
+
1172
+ Use as standalone model or pair with 8 perspective LoRA adapters:
1173
+ - Newton (analytical physics)
1174
+ - DaVinci (creative synthesis)
1175
+ - Empathy (emotional intelligence)
1176
+ - Philosophy (conceptual analysis)
1177
+ - Quantum (probabilistic reasoning)
1178
+ - Consciousness (meta-cognition / RC+ΞΎ)
1179
+ - Multi-Perspective (integration)
1180
+ - Systems Architecture (design)
1181
+
1182
+ Adapters: https://huggingface.co/{OUTPUT_REPO}
1183
+ """
1184
+ with open(f"{merged_dir}/README.md", "w") as f:
1185
+ f.write(model_card)
1186
+
1187
+ # Upload to HuggingFace
1188
+ print(" Creating merged model repo...")
1189
+ try:
1190
+ api.create_repo(MERGED_REPO, private=False, token=HF_TOKEN)
1191
+ except Exception:
1192
+ pass
1193
+
1194
+ print(f" Uploading merged model to {MERGED_REPO}...")
1195
+ api.upload_folder(
1196
+ folder_path=merged_dir,
1197
+ repo_id=MERGED_REPO,
1198
+ token=HF_TOKEN,
1199
+ )
1200
+ print(f" Merged model uploaded: https://huggingface.co/{MERGED_REPO}")
1201
+
1202
+ # Cleanup
1203
+ del base_model, merged_model
1204
+ gc.collect()
1205
+ if torch.cuda.is_available():
1206
+ torch.cuda.empty_cache()
1207
+
1208
+ except Exception as e:
1209
+ print(f" MERGE FAILED: {e}")
1210
+ print(traceback.format_exc())
1211
+ print(" Continuing without merge β€” adapters still available individually.")
1212
+
1213
+
1214
+ # ═══════════════════════════════════════════════════════════════
1215
+ # Main Pipeline
1216
+ # ═══════════════════════════════════════════════════════════════
1217
+ def main():
1218
+ print("=" * 60)
1219
+ print("CODETTE v4 TRAINING PIPELINE")
1220
+ print(f"Framework: Phase 6+ (Semantic Tension + Coherence + AEGIS)")
1221
+ print(f"Base Model: {MODEL_NAME}")
1222
+ print(f"Adapters: {len(ADAPTERS)}")
1223
+ print(f"Started: {datetime.now().isoformat()}")
1224
+ print("=" * 60)
1225
+ print(f"CUDA: {torch.cuda.is_available()}")
1226
+ if torch.cuda.is_available():
1227
+ print(f"GPU: {torch.cuda.get_device_name(0)}")
1228
+ print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
1229
+ print(f"HF Token: {'present' if HF_TOKEN else 'MISSING!'}")
1230
+ print(f"Generate datasets: {GENERATE_DATASETS}")
1231
+ print(f"Upload datasets: {UPLOAD_DATASETS}")
1232
+ print(f"Merge base: {MERGE_BASE}")
1233
+
1234
+ api = HfApi(token=HF_TOKEN)
1235
+
1236
+ # Ensure output repo exists
1237
+ try:
1238
+ api.create_repo(OUTPUT_REPO, private=True, token=HF_TOKEN)
1239
+ print(f"\nCreated output repo: {OUTPUT_REPO}")
1240
+ except Exception:
1241
+ print(f"\nOutput repo exists: {OUTPUT_REPO}")
1242
+
1243
+ dataset_dir = Path("/tmp/datasets")
1244
+ dataset_dir.mkdir(exist_ok=True)
1245
+
1246
+ # Phase 1: Generate datasets
1247
+ if GENERATE_DATASETS:
1248
+ gen_results = generate_datasets(dataset_dir, seed=42)
1249
+ if UPLOAD_DATASETS:
1250
+ upload_datasets(api, dataset_dir, gen_results)
1251
+ else:
1252
+ # Download existing datasets
1253
+ print("\nDownloading existing datasets from HF...")
1254
+ for adapter_name, config in ADAPTERS.items():
1255
+ try:
1256
+ hf_hub_download(
1257
+ DATASET_REPO, config["dataset_file"],
1258
+ repo_type="dataset", local_dir=str(dataset_dir), token=HF_TOKEN,
1259
+ )
1260
+ print(f" Downloaded: {config['dataset_file']}")
1261
+ except Exception as e:
1262
+ print(f" FAILED: {config['dataset_file']}: {e}")
1263
+
1264
+ # Phase 3: Train adapters
1265
+ train_results = train_adapters(dataset_dir)
1266
+
1267
+ # Phase 4: Merge orchestrator adapter into base model
1268
+ if MERGE_BASE:
1269
+ merge_orchestrator_base(api)
1270
+
1271
+ # Summary
1272
+ print(f"\n{'=' * 60}")
1273
+ print("PIPELINE COMPLETE")
1274
+ print(f"{'=' * 60}")
1275
+ for name, r in train_results.items():
1276
+ if name.startswith("_"):
1277
+ continue
1278
+ if "error" in r:
1279
+ print(f" {name}: FAILED - {r['error']}")
1280
+ else:
1281
+ print(f" {name}: loss={r['loss']:.4f}, steps={r['steps']}, "
1282
+ f"examples={r['examples']}, time={r['time_seconds']:.0f}s")
1283
+
1284
+ meta = train_results.get("_meta", {})
1285
+ print(f"\nTotal time: {meta.get('total_time_minutes', 0):.1f} minutes")
1286
+ print(f"Completed: {meta.get('completed', [])}")
1287
+ if meta.get("failed_uploads"):
1288
+ print(f"Failed uploads: {meta['failed_uploads']}")
1289
+ print(f"\nAdapters: https://huggingface.co/{OUTPUT_REPO}")
1290
+ print(f"Datasets: https://huggingface.co/datasets/{DATASET_REPO}")
1291
+ if MERGE_BASE:
1292
+ print(f"Merged model: https://huggingface.co/{MERGED_REPO}")
1293
+
1294
+
1295
+ if __name__ == "__main__":
1296
+ main()