| """ |
| PIPELINE PRINCIPAL — Modelo Híbrido de LLM |
| =========================================== |
| Orquestra as 8 etapas do fluxo completo: |
| |
| 1. Recepção do prompt |
| 2. Extração de conceitos [L1] |
| 3. Refinamento por Juízos Kantianos [L2] |
| 4. Silogismo Científico + Hempel |
| 5. Falseabilidade de Popper |
| 6. Avaliação Paraconsistente [L3] |
| 7. Síntese por Equivalência [L4] |
| 8. Geração da Resposta [L5 — opcional] |
| |
| Usa config_loader, knowledge_base (KB escalável + RAG opcional), l5_generation |
| e opcionalmente o agente de pesquisa para enriquecer contexto. |
| """ |
|
|
| from __future__ import annotations |
| import sys |
| import re |
| import time |
| import os |
| from pathlib import Path |
| from typing import Dict, List, Optional, Any |
|
|
| import torch |
|
|
| from neural_truth_model import TruthScoringModel, load_tokenizer |
| from l1_concept_table import ConceptTable, ConceptNode |
| from l2_kantian_judgments import KantianJudgmentEngine, KantianJudgment |
| from syllogism_module import ScientificSyllogismPipeline |
| from l3_paraconsistent import ParaconsistentEngine, ParaconsistentValue |
| from l4_synthesis import RussellianSynthesisEngine, SynthesisResult |
|
|
| try: |
| from l4_russell_equivalence import load_concept_base |
| except Exception: |
| load_concept_base = None |
|
|
| try: |
| from config_loader import load_config, PROJECT_ROOT |
| except Exception: |
| load_config = None |
| PROJECT_ROOT = Path(__file__).resolve().parent |
|
|
| try: |
| from knowledge_base import get_knowledge_base, SEED_KNOWLEDGE_BASE |
| except Exception: |
| get_knowledge_base = None |
| SEED_KNOWLEDGE_BASE = {} |
|
|
| try: |
| from l5_generation import generate_response as l5_generate |
| except Exception: |
| l5_generate = None |
|
|
| try: |
| from agente_busca_web import run_search_for_context |
| except Exception: |
| run_search_for_context = None |
|
|
|
|
| def _get_kb(config: Optional[Dict[str, Any]], prompt: str, use_agent: bool) -> Dict[str, float]: |
| if get_knowledge_base is None: |
| return dict(SEED_KNOWLEDGE_BASE) if SEED_KNOWLEDGE_BASE else {} |
| return get_knowledge_base( |
| config=config, |
| query_for_rag=prompt if use_agent else None, |
| ) |
|
|
|
|
| class HybridLLMPipeline: |
| """ |
| Pipeline completo do Modelo Híbrido de LLM. |
| Suporta config, KB escalável, L5 (geração), agente opcional e chat. |
| """ |
|
|
| def __init__( |
| self, |
| knowledge_base: Optional[Dict[str, float]] = None, |
| config: Optional[Dict[str, Any]] = None, |
| verbose: bool = True, |
| ) -> None: |
| self._config = config or (load_config() if load_config else {}) |
| self.kb = knowledge_base or _get_kb(self._config, "", False) |
| if not self.kb: |
| self.kb = dict(SEED_KNOWLEDGE_BASE) if SEED_KNOWLEDGE_BASE else {} |
| self.verbose = verbose |
|
|
| self.L1 = ConceptTable() |
| self.L2 = KantianJudgmentEngine(self.L1) |
| self.SYL = ScientificSyllogismPipeline() |
|
|
| |
| l3_cfg = self._config.get("l3", {}) |
| model_path = l3_cfg.get("model_path", "truth_scoring_model.pt") |
| backbone_name = l3_cfg.get("backbone", "bert-base-multilingual-cased") |
| if not Path(model_path).is_absolute(): |
| model_path = str(PROJECT_ROOT / model_path) |
| neural_model = None |
| neural_tokenizer = None |
| if os.path.exists(model_path): |
| try: |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| neural_tokenizer = load_tokenizer(backbone_name) |
| neural_model = TruthScoringModel(backbone_name=backbone_name) |
| state = torch.load(model_path, map_location=device) |
| neural_model.load_state_dict(state) |
| neural_model.to(device) |
| if self.verbose: |
| print(f"[L3] Modelo neural carregado de '{model_path}'") |
| self.L3 = ParaconsistentEngine(neural_model=neural_model, neural_tokenizer=neural_tokenizer, device=device) |
| except Exception as exc: |
| if self.verbose: |
| print(f"[L3] Falha ao carregar modelo neural: {exc}") |
| self.L3 = ParaconsistentEngine() |
| else: |
| self.L3 = ParaconsistentEngine() |
|
|
| |
| russell_base = None |
| rpath = self._config.get("l4", {}).get("russell_concepts_path", "l4_russell_concepts.json") |
| if not Path(rpath).is_absolute(): |
| rpath = str(PROJECT_ROOT / rpath) |
| if load_concept_base and os.path.exists(rpath): |
| try: |
| russell_base = load_concept_base(rpath) |
| if self.verbose: |
| print("[L4] Base russelliana carregada.") |
| except Exception: |
| pass |
| if russell_base is None and load_concept_base: |
| try: |
| from l4_russell_equivalence import build_russell_concept_base |
| russell_base = build_russell_concept_base() |
| except Exception: |
| pass |
| self.L4 = RussellianSynthesisEngine( |
| self.kb, |
| russell_concept_base=russell_base, |
| use_concept_based_weights=(russell_base is not None), |
| ) |
|
|
| def process( |
| self, |
| prompt: str, |
| chat_session: Optional[Any] = None, |
| use_agent: Optional[bool] = None, |
| skip_l5: bool = False, |
| ) -> SynthesisResult: |
| """Executa o pipeline e retorna SynthesisResult (com response já gerada por L5 se ativo).""" |
| t0 = time.perf_counter() |
| use_agent = use_agent if use_agent is not None else self._config.get("agent", {}).get("use_agent", False) |
| if chat_session and hasattr(chat_session, "get_context_for_prompt"): |
| prompt_for_kb = chat_session.get_context_for_prompt(prompt, self._config.get("chat", {}).get("max_turns_in_context", 10)) |
| else: |
| prompt_for_kb = prompt |
|
|
| |
| if use_agent and get_knowledge_base: |
| self.kb = _get_kb(self._config, prompt_for_kb, True) |
| if not self.kb: |
| self.kb = dict(SEED_KNOWLEDGE_BASE) if SEED_KNOWLEDGE_BASE else {} |
|
|
| self._log("\n" + "═" * 60) |
| self._log(f" PROMPT: {prompt[:200]}{'...' if len(prompt) > 200 else ''}") |
| self._log("═" * 60) |
|
|
| limit = RussellianSynthesisEngine.check_fundamental_limits(prompt) |
| if limit: |
| self._log(f"\n{limit}") |
|
|
| self._log("\n[ETAPA 2] L1 — Extração de Conceitos") |
| concepts: List[ConceptNode] = self.L1.extract_concepts(prompt) |
| concepts_summary = "" |
| if self.verbose and concepts: |
| for c in concepts: |
| syns = ", ".join(c.synonyms[:2]) or "—" |
| self._log(f" • {c.term:15s} | sinônimos: {syns}") |
| concepts_summary = "; ".join(f"{c.term}({', '.join(c.synonyms[:2])})" for c in concepts[:8]) |
|
|
| self._log("\n[ETAPA 3] L2 — Juízos Kantianos") |
| judgments: List[KantianJudgment] = self.L2.refine(prompt, concepts) |
| top_judgments = "" |
| if judgments: |
| top_judgments = "\n".join(j.proposicao for j, _ in list(zip(judgments, [None] * 6))[:6]) |
|
|
| self._log("\n[ETAPAS 4+5] Silogismo + Hempel + Popper") |
| prompt_terms = set(re.findall(r"[a-záàãâéêíóôõúüçA-ZÁÀÃÂÉÊÍÓÔÕÚÜÇ]+", prompt.lower())) |
| kb_scores = {j.proposicao[:30]: self.kb.get(j.proposicao.split()[0], 0.3) for j in judgments} |
| filtered = self.SYL.run(judgments, prompt_terms, kb_scores) |
| self._log(f" {len(judgments)} hipóteses → {len(filtered)} após filtros") |
|
|
| self._log("\n[ETAPA 6] L3 — Lógica Paraconsistente") |
| props_with_priority = [(j.proposicao, score) for j, score in filtered] |
| pv_list: List[ParaconsistentValue] = self.L3.evaluate(props_with_priority, self.kb) |
| consistent = self.L3.check_global_consistency(pv_list) |
| self._log(f" Consistência global: {'✓' if consistent else '✗'}") |
|
|
| self._log("\n[ETAPA 7] L4 — Síntese Russelliana") |
| l2_priorities = {j.proposicao[:40]: j.prioridade for j, _ in filtered} |
| result: SynthesisResult = self.L4.synthesize(pv_list, l2_priorities, prompt) |
|
|
| |
| agent_context = "" |
| if use_agent and run_search_for_context: |
| try: |
| agent_context = run_search_for_context(prompt) |
| if agent_context and self.verbose: |
| self._log("\n[AGENTE] Contexto de busca obtido.") |
| except Exception: |
| pass |
|
|
| |
| gen_cfg = self._config.get("generation", {}) |
| provider = gen_cfg.get("provider", "template") |
| if not skip_l5 and l5_generate and provider != "template": |
| final_response = l5_generate( |
| prompt, |
| result, |
| provider=provider, |
| concepts_summary=concepts_summary, |
| top_judgments=top_judgments, |
| groq_model=gen_cfg.get("groq_model", "mixtral-8x7b-32768"), |
| custom_lm_path=gen_cfg.get("custom_lm_path", ""), |
| ) |
| if agent_context and final_response: |
| final_response = final_response + "\n\n[Contexto da busca]\n" + agent_context[:800] |
| elif agent_context: |
| final_response = result.response + "\n\n[Contexto da busca]\n" + agent_context[:800] |
| else: |
| final_response = final_response or result.response |
| result = SynthesisResult( |
| response=final_response, |
| truth_value=result.truth_value, |
| certainty=result.certainty, |
| contradiction=result.contradiction, |
| state=result.state, |
| supporting_evidence=result.supporting_evidence, |
| falsified_hypotheses=result.falsified_hypotheses, |
| confidence_label=result.confidence_label, |
| ) |
| elif agent_context and result.response: |
| result = SynthesisResult( |
| response=result.response + "\n\n[Contexto da busca]\n" + agent_context[:800], |
| truth_value=result.truth_value, |
| certainty=result.certainty, |
| contradiction=result.contradiction, |
| state=result.state, |
| supporting_evidence=result.supporting_evidence, |
| falsified_hypotheses=result.falsified_hypotheses, |
| confidence_label=result.confidence_label, |
| ) |
|
|
| elapsed = (time.perf_counter() - t0) * 1000 |
| self._log(f"\n[ETAPA 8] Resposta Final ({elapsed:.1f} ms)\n") |
| self._log(str(result)) |
| return result |
|
|
| def _log(self, msg: str) -> None: |
| if self.verbose: |
| print(msg) |
|
|
| def repl(self) -> None: |
| print("\n" + "═" * 60) |
| print(" MODELO HÍBRIDO DE LLM — Fonseca") |
| print(" Digite 'sair' para encerrar") |
| print("═" * 60) |
| while True: |
| try: |
| prompt = input("\nPrompt › ").strip() |
| except (EOFError, KeyboardInterrupt): |
| break |
| if not prompt: |
| continue |
| if prompt.lower() in {"sair", "exit", "quit"}: |
| break |
| self.process(prompt) |
|
|
|
|
| def main() -> None: |
| import argparse |
| parser = argparse.ArgumentParser(description="Modelo Híbrido de LLM — Pipeline L1–L5") |
| parser.add_argument("--prompt", "-p", type=str, help="Pergunta única (imprime só a resposta)") |
| parser.add_argument("--repl", action="store_true", help="Modo interativo") |
| parser.add_argument("--demo", action="store_true", help="Rodar demonstração com prompts fixos") |
| parser.add_argument("--config", type=str, help="Caminho para config.yaml") |
| args, _ = parser.parse_known_args() |
|
|
| config = load_config(Path(args.config)) if load_config and args.config else (load_config() if load_config else {}) |
| pipeline = HybridLLMPipeline(config=config, verbose=not args.prompt) |
|
|
| if args.prompt: |
| r = pipeline.process(args.prompt) |
| print(r.response) |
| return |
| if args.repl: |
| pipeline.repl() |
| return |
| if args.demo: |
| for p in ["A água a 35 graus está quente ou fria?", "O que é a verdade?"]: |
| pipeline.process(p) |
| print() |
| return |
| |
| if "--repl" in sys.argv: |
| pipeline.repl() |
| return |
| for p in ["A água a 35 graus está quente ou fria?", "O que é a verdade?"]: |
| pipeline.process(p) |
| print() |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|