| import json |
| from abc import ABC |
| from typing import List |
| from langchain.docstore.document import Document |
| from langchain.document_loaders.base import BaseLoader |
|
|
|
|
| class Person: |
| def __init__(self, name, age): |
| self.name = name |
| self.age = age |
|
|
|
|
| class Dialogue: |
| """ |
| Build an abstract dialogue model using classes and methods to represent different dialogue elements. |
| This class serves as a fundamental framework for constructing dialogue models. |
| """ |
|
|
| def __init__(self, file_path: str): |
| self.file_path = file_path |
| self.turns = [] |
|
|
| def add_turn(self, turn): |
| """ |
| Create an instance of a conversation participant |
| :param turn: |
| :return: |
| """ |
| self.turns.append(turn) |
|
|
| def parse_dialogue(self): |
| """ |
| The parse_dialogue function reads the specified dialogue file and parses each dialogue turn line by line. |
| For each turn, the function extracts the name of the speaker and the message content from the text, |
| creating a Turn instance. If the speaker is not already present in the participants dictionary, |
| a new Person instance is created. Finally, the parsed Turn instance is added to the Dialogue object. |
| |
| Please note that this sample code assumes that each line in the file follows a specific format: |
| <speaker>:\r\n<message>\r\n\r\n. If your file has a different format or includes other metadata, |
| you may need to adjust the parsing logic accordingly. |
| """ |
| participants = {} |
| speaker_name = None |
| message = None |
|
|
| with open(self.file_path, encoding='utf-8') as file: |
| lines = file.readlines() |
| for i, line in enumerate(lines): |
| line = line.strip() |
| if not line: |
| continue |
|
|
| if speaker_name is None: |
| speaker_name, _ = line.split(':', 1) |
| elif message is None: |
| message = line |
| if speaker_name not in participants: |
| participants[speaker_name] = Person(speaker_name, None) |
|
|
| speaker = participants[speaker_name] |
| turn = Turn(speaker, message) |
| self.add_turn(turn) |
|
|
| |
| speaker_name = None |
| message = None |
|
|
| def display(self): |
| for turn in self.turns: |
| print(f"{turn.speaker.name}: {turn.message}") |
|
|
| def export_to_file(self, file_path): |
| with open(file_path, 'w', encoding='utf-8') as file: |
| for turn in self.turns: |
| file.write(f"{turn.speaker.name}: {turn.message}\n") |
|
|
| def to_dict(self): |
| dialogue_dict = {"turns": []} |
| for turn in self.turns: |
| turn_dict = { |
| "speaker": turn.speaker.name, |
| "message": turn.message |
| } |
| dialogue_dict["turns"].append(turn_dict) |
| return dialogue_dict |
|
|
| def to_json(self): |
| dialogue_dict = self.to_dict() |
| return json.dumps(dialogue_dict, ensure_ascii=False, indent=2) |
|
|
| def participants_to_export(self): |
| """ |
| participants_to_export |
| :return: |
| """ |
| participants = set() |
| for turn in self.turns: |
| participants.add(turn.speaker.name) |
| return ', '.join(participants) |
|
|
|
|
| class Turn: |
| def __init__(self, speaker, message): |
| self.speaker = speaker |
| self.message = message |
|
|
|
|
| class DialogueLoader(BaseLoader, ABC): |
| """Load dialogue.""" |
|
|
| def __init__(self, file_path: str): |
| """Initialize with dialogue.""" |
| self.file_path = file_path |
| dialogue = Dialogue(file_path=file_path) |
| dialogue.parse_dialogue() |
| self.dialogue = dialogue |
|
|
| def load(self) -> List[Document]: |
| """Load from dialogue.""" |
| documents = [] |
| participants = self.dialogue.participants_to_export() |
|
|
| for turn in self.dialogue.turns: |
| metadata = {"source": f"Dialogue File:{self.dialogue.file_path}," |
| f"speaker:{turn.speaker.name}," |
| f"participant:{participants}"} |
| turn_document = Document(page_content=turn.message, metadata=metadata.copy()) |
| documents.append(turn_document) |
|
|
| return documents |
|
|