| """ |
| Modèles Pydantic pour l'agent Global Synthesizer. |
| Définit les structures de données pour la synthèse finale et le rapport global. |
| """ |
|
|
| from typing import List, Optional, Dict, Any |
| from datetime import datetime |
| from pydantic import BaseModel, Field |
| from enum import Enum |
|
|
| from src.models.document_models import DocumentSummary, SummarizationOutput |
|
|
|
|
| class ReportType(str, Enum): |
| """Types de rapports de synthèse finale.""" |
| EXECUTIVE = "executive" |
| DETAILED = "detailed" |
| ACADEMIC = "academic" |
| BUSINESS = "business" |
|
|
|
|
| class ReportFormat(str, Enum): |
| """Formats de sortie du rapport.""" |
| MARKDOWN = "markdown" |
| HTML = "html" |
| TEXT = "text" |
|
|
|
|
| class GlobalSynthesisInput(BaseModel): |
| """ |
| Input pour l'agent Global Synthesizer. |
| """ |
| summarization_output: SummarizationOutput = Field( |
| ..., |
| description="Sortie complète de l'agent Summarizer avec tous les résumés" |
| ) |
| original_topic: str = Field( |
| ..., |
| description="Sujet de recherche original" |
| ) |
| synthesis_options: Optional[Dict[str, Any]] = Field( |
| default_factory=dict, |
| description="Options de configuration pour la synthèse" |
| ) |
| |
| |
| report_type: ReportType = Field( |
| default=ReportType.DETAILED, |
| description="Type de rapport à générer" |
| ) |
| report_format: ReportFormat = Field( |
| default=ReportFormat.MARKDOWN, |
| description="Format de sortie du rapport" |
| ) |
| include_methodology: bool = Field( |
| default=True, |
| description="Inclure la section méthodologie" |
| ) |
| include_sources: bool = Field( |
| default=True, |
| description="Inclure les références des sources" |
| ) |
| include_limitations: bool = Field( |
| default=True, |
| description="Inclure les limitations de l'analyse" |
| ) |
| max_report_length: int = Field( |
| default=5000, |
| description="Longueur maximale du rapport en mots" |
| ) |
| target_audience: str = Field( |
| default="general", |
| description="Audience cible (general, business, academic, policy_makers)" |
| ) |
| |
| def __init__(self, **data): |
| |
| synthesis_options = data.get('synthesis_options', {}) |
| |
| |
| if 'report_type' in synthesis_options: |
| data['report_type'] = synthesis_options['report_type'] |
| if 'report_format' in synthesis_options: |
| data['report_format'] = synthesis_options['report_format'] |
| if 'include_methodology' in synthesis_options: |
| data['include_methodology'] = synthesis_options['include_methodology'] |
| if 'include_sources' in synthesis_options: |
| data['include_sources'] = synthesis_options['include_sources'] |
| if 'include_limitations' in synthesis_options: |
| data['include_limitations'] = synthesis_options['include_limitations'] |
| if 'max_report_length' in synthesis_options: |
| data['max_report_length'] = synthesis_options['max_report_length'] |
| if 'target_audience' in synthesis_options: |
| data['target_audience'] = synthesis_options['target_audience'] |
| |
| super().__init__(**data) |
| |
| class Config: |
| json_schema_extra = { |
| "example": { |
| "original_topic": "impact de l'intelligence artificielle sur l'emploi", |
| "synthesis_options": { |
| "report_type": "detailed", |
| "report_format": "markdown", |
| "include_methodology": True, |
| "include_sources": True, |
| "target_audience": "business" |
| } |
| } |
| } |
|
|
|
|
| class ExecutiveSummary(BaseModel): |
| """Résumé exécutif du rapport final.""" |
| |
| key_findings: List[str] = Field( |
| default_factory=list, |
| description="3-5 conclusions principales" |
| ) |
| main_insights: List[str] = Field( |
| default_factory=list, |
| description="Insights et découvertes principales" |
| ) |
| recommendations: List[str] = Field( |
| default_factory=list, |
| description="Recommandations basées sur l'analyse" |
| ) |
| summary_text: str = Field( |
| ..., |
| description="Texte de synthèse exécutive (2-3 paragraphes)" |
| ) |
|
|
|
|
| class ReportSection(BaseModel): |
| """Section individuelle du rapport.""" |
| |
| title: str = Field(..., description="Titre de la section") |
| content: str = Field(..., description="Contenu de la section") |
| subsections: List['ReportSection'] = Field( |
| default_factory=list, |
| description="Sous-sections" |
| ) |
| order: int = Field(default=0, description="Ordre d'affichage") |
|
|
|
|
| class SourceReference(BaseModel): |
| """Référence bibliographique d'une source.""" |
| |
| title: str = Field(..., description="Titre du document source") |
| url: str = Field(..., description="URL du document") |
| author: Optional[str] = Field(default=None, description="Auteur") |
| publication_date: Optional[datetime] = Field(default=None, description="Date de publication") |
| credibility_score: Optional[float] = Field(default=None, description="Score de crédibilité") |
| citation_count: int = Field(default=0, description="Nombre de fois citée dans le rapport") |
|
|
|
|
| class Methodology(BaseModel): |
| """Description de la méthodologie utilisée.""" |
| |
| research_approach: str = Field(..., description="Approche de recherche utilisée") |
| sources_count: int = Field(..., description="Nombre de sources analysées") |
| analysis_methods: List[str] = Field( |
| default_factory=list, |
| description="Méthodes d'analyse utilisées" |
| ) |
| limitations: List[str] = Field( |
| default_factory=list, |
| description="Limitations de l'étude" |
| ) |
| data_quality_assessment: str = Field( |
| ..., |
| description="Évaluation de la qualité des données" |
| ) |
|
|
|
|
| class FinalReport(BaseModel): |
| """ |
| Modèle pour le rapport final de synthèse globale. |
| """ |
| |
| |
| report_id: str = Field(..., description="Identifiant unique du rapport") |
| title: str = Field(..., description="Titre du rapport") |
| topic: str = Field(..., description="Sujet de recherche original") |
| generated_at: datetime = Field(default_factory=datetime.now, description="Date de génération") |
| report_type: ReportType = Field(default=ReportType.DETAILED, description="Type de rapport") |
| report_format: ReportFormat = Field(default=ReportFormat.MARKDOWN, description="Format du rapport") |
| |
| |
| executive_summary: ExecutiveSummary = Field(..., description="Résumé exécutif") |
| introduction: str = Field(..., description="Introduction du rapport") |
| main_sections: List[ReportSection] = Field( |
| default_factory=list, |
| description="Sections principales du rapport" |
| ) |
| conclusion: str = Field(..., description="Conclusion du rapport") |
| |
| |
| key_themes: List[str] = Field( |
| default_factory=list, |
| description="Thèmes principaux identifiés" |
| ) |
| consensus_points: List[str] = Field( |
| default_factory=list, |
| description="Points de consensus entre les sources" |
| ) |
| conflicting_viewpoints: List[str] = Field( |
| default_factory=list, |
| description="Points de vue contradictoires" |
| ) |
| emerging_trends: List[str] = Field( |
| default_factory=list, |
| description="Tendances émergentes identifiées" |
| ) |
| |
| |
| methodology: Methodology = Field(..., description="Méthodologie utilisée") |
| sources: List[SourceReference] = Field( |
| default_factory=list, |
| description="Sources utilisées avec références" |
| ) |
| |
| |
| confidence_score: float = Field( |
| default=0.0, |
| ge=0.0, |
| le=1.0, |
| description="Score de confiance global (0-1)" |
| ) |
| completeness_score: float = Field( |
| default=0.0, |
| ge=0.0, |
| le=1.0, |
| description="Score de complétude de l'analyse (0-1)" |
| ) |
| |
| |
| total_sources_analyzed: int = Field(default=0, description="Nombre total de sources analysées") |
| processing_time: float = Field(default=0.0, description="Temps de traitement en secondes") |
| word_count: int = Field(default=0, description="Nombre de mots du rapport") |
| |
| class Config: |
| json_schema_extra = { |
| "example": { |
| "report_id": "rpt_20241115_001", |
| "title": "Impact de l'Intelligence Artificielle sur l'Emploi - Rapport de Synthèse", |
| "topic": "impact de l'intelligence artificielle sur l'emploi", |
| "report_type": "detailed", |
| "executive_summary": { |
| "key_findings": [ |
| "L'IA transformera 60% des emplois d'ici 2030", |
| "Nouveaux emplois créés dans la tech et supervision IA" |
| ], |
| "summary_text": "Analyse complète de l'impact de l'IA..." |
| }, |
| "confidence_score": 0.85, |
| "total_sources_analyzed": 5 |
| } |
| } |
|
|
|
|
| class GlobalSynthesisOutput(BaseModel): |
| """ |
| Modèle pour l'output de l'agent Global Synthesizer. |
| """ |
| |
| final_report: FinalReport = Field(..., description="Rapport final de synthèse") |
| synthesis_metadata: Dict[str, Any] = Field( |
| default_factory=dict, |
| description="Métadonnées sur le processus de synthèse" |
| ) |
| processing_stats: Dict[str, Any] = Field( |
| default_factory=dict, |
| description="Statistiques de traitement" |
| ) |
| |
| |
| formatted_outputs: Dict[str, str] = Field( |
| default_factory=dict, |
| description="Rapport formaté dans différents formats (markdown, html, etc.)" |
| ) |
| |
| timestamp: datetime = Field( |
| default_factory=datetime.now, |
| description="Horodatage de la synthèse" |
| ) |
| |
| class Config: |
| json_schema_extra = { |
| "example": { |
| "synthesis_metadata": { |
| "llm_model_used": "groq/llama-3.1-8b-instant", |
| "synthesis_strategy": "comprehensive", |
| "quality_checks_passed": True |
| }, |
| "processing_stats": { |
| "input_summaries": 5, |
| "synthesis_time": 15.3, |
| "final_report_words": 2500 |
| } |
| } |
| } |
|
|
|
|
| |
| ReportSection.model_rebuild() |