| from typing import Dict, List |
| from tqdm import tqdm |
| from config import Config |
| from utils.cache_manager import CacheManager |
| from core.openai_client import OpenAIClient |
|
|
| class CharacterAnalyzer: |
| """角色性格分析器 - 针对大规模文本优化""" |
| |
| def __init__(self): |
| self.client = OpenAIClient.get_client() |
| self.cache = CacheManager() |
| |
| def select_representative_chunks(self, chunks: List[Dict], |
| character_chunks: List[int], |
| max_chunks: int = None) -> List[Dict]: |
| """选择最具代表性的文本块""" |
| |
| max_chunks = max_chunks or Config.MAX_ANALYSIS_CHUNKS |
| |
| |
| if len(character_chunks) <= max_chunks: |
| selected_ids = character_chunks |
| else: |
| step = len(character_chunks) // max_chunks |
| selected_ids = [character_chunks[i * step] for i in range(max_chunks)] |
| |
| selected_chunks = [chunks[i] for i in selected_ids if i < len(chunks)] |
| return selected_chunks |
| |
| def analyze_character_batch(self, character_name: str, |
| text_chunks: List[Dict]) -> Dict: |
| """分批分析角色性格""" |
| |
| |
| cache_key = f"analysis_{character_name}_{hash(str([c['chunk_id'] for c in text_chunks]))}" |
| cached = self.cache.get(cache_key) |
| if cached: |
| print(f"从缓存加载 {character_name} 的分析结果") |
| return cached |
| |
| |
| combined_text = "\n\n---\n\n".join([c['text'] for c in text_chunks]) |
| |
| analysis_prompt = f""" |
| 请深度分析小说中"{character_name}"这个角色的性格特征。 |
| |
| 基于以下文本片段进行分析: |
| |
| {combined_text[:8000]} # 限制输入长度 |
| |
| 请从以下维度分析,并以JSON格式返回: |
| |
| {{ |
| "name": "{character_name}", |
| "core_traits": ["特质1", "特质2", "特质3"], |
| "speaking_style": "说话风格描述", |
| "behavior_patterns": "行为模式描述", |
| "values": "核心价值观", |
| "emotional_style": "情感表达方式", |
| "relationship_style": "人际关系风格", |
| "background": "背景信息", |
| "key_quotes": ["典型语句1", "典型语句2"], |
| "personality_summary": "性格总结(100字以内)" |
| }} |
| |
| 注意: |
| 1. 只基于文本内容分析,不要添加原著之外的信息 |
| 2. 提取该角色的典型对话和行为 |
| 3. 关注语言风格、用词习惯、口头禅等 |
| """ |
| |
| try: |
| response = self.client.chat.completions.create( |
| model=Config.MODEL_NAME, |
| messages=[ |
| {"role": "system", "content": "你是一个专业的文学角色分析专家。请基于文本内容进行深入分析。"}, |
| {"role": "user", "content": analysis_prompt} |
| ] |
| ) |
| |
| analysis_text = response.choices[0].message.content.strip() |
| |
| |
| import json |
| import re |
| |
| json_match = re.search(r'\{.*\}', analysis_text, re.DOTALL) |
| if json_match: |
| profile = json.loads(json_match.group()) |
| else: |
| profile = self._parse_text_analysis(analysis_text, character_name) |
| |
| profile['raw_analysis'] = analysis_text |
| |
| |
| self.cache.set(cache_key, profile) |
| |
| return profile |
| |
| except Exception as e: |
| print(f"分析失败: {e}") |
| return self._default_profile(character_name) |
| |
| def _parse_text_analysis(self, text: str, character_name: str) -> Dict: |
| """解析文本格式的分析结果""" |
| |
| profile = { |
| 'name': character_name, |
| 'core_traits': [], |
| 'speaking_style': '', |
| 'behavior_patterns': '', |
| 'values': '', |
| 'emotional_style': '', |
| 'relationship_style': '', |
| 'background': '', |
| 'key_quotes': [], |
| 'personality_summary': '' |
| } |
| |
| |
| import re |
| |
| patterns = { |
| 'core_traits': r'core_traits["\']?\s*:\s*\[(.*?)\]', |
| 'speaking_style': r'speaking_style["\']?\s*:\s*["\']([^"\']+)["\']', |
| 'key_quotes': r'key_quotes["\']?\s*:\s*\[(.*?)\]', |
| } |
| |
| for key, pattern in patterns.items(): |
| match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) |
| if match: |
| content = match.group(1) |
| if key in ['core_traits', 'key_quotes']: |
| items = re.findall(r'["\']([^"\']+)["\']', content) |
| profile[key] = items |
| else: |
| profile[key] = content |
| |
| return profile |
| |
| def _default_profile(self, character_name: str) -> Dict: |
| """默认角色配置""" |
| return { |
| 'name': character_name, |
| 'core_traits': ['复杂', '多面'], |
| 'speaking_style': '根据情境变化', |
| 'behavior_patterns': '待观察', |
| 'values': '待分析', |
| 'emotional_style': '情感丰富', |
| 'relationship_style': '因人而异', |
| 'background': '小说角色', |
| 'key_quotes': [], |
| 'personality_summary': f'{character_name}是一个复杂的角色', |
| 'raw_analysis': '使用默认配置' |
| } |
| |
| def enhance_profile_with_examples(self, profile: Dict, chunks: List[Dict], |
| character_chunks: List[int]) -> Dict: |
| """通过对话实例增强角色配置""" |
| |
| |
| dialogues = [] |
| for chunk_id in character_chunks[:5]: |
| if chunk_id < len(chunks): |
| chunk_text = chunks[chunk_id]['text'] |
| |
| import re |
| quotes = re.findall(r'["\']([^"\']{10,100})["\']', chunk_text) |
| dialogues.extend(quotes[:3]) |
| |
| if dialogues: |
| profile['example_dialogues'] = dialogues[:5] |
| |
| return profile |