Upload 3 files

Files changed (3) hide show

sentence_splitter/function.py +34 -0
sentence_splitter/sentence_splitter.py +85 -0
util/llm.py +271 -0

sentence_splitter/function.py ADDED Viewed

	@@ -0,0 +1,34 @@

+def split_list(array: list[str], separator: str) -> list[str]:
+    r = []
+    placeholder = "\uE000"
+    for s in array:
+        s_with_marker = s.replace(separator, separator + placeholder)
+        parts = s_with_marker.split(placeholder)
+        r.extend(parts)
+    return r
+def split(text: str) -> list[str]:
+    for replacement in [' \n', '\n ', '\n\n']:
+        while replacement in text:
+            text = text.replace(replacement, '\n')
+    protections = ['d. h.', 'Abs.', 'Art.', 'Bem.', 'Bst.', ' ff.', ' f.', '(ff.', '(f.', 'insbes.', 'S.', 'V.']
+    for protection in protections:
+        text = text.replace(protection, protection.replace('.', '\uE000'))
+    placeholder = "\uE001"
+    for i in range(3, len(text) - 3):
+        if text[i] == '.':
+            if (
+                (text[i - 2] == ' ') or
+                ( not text[i + 2].isupper()) or
+                (text[i - 1].isdigit())
+            ):
+                text = text[:i] + placeholder + text[i+1:]
+    array = [text]
+    for value in ['\n', '. ', '? ']:
+        array = split_list(array, value)
+    final_list = []
+    for s in array:
+        cleaned_s = s.replace(placeholder, '.').strip()
+        final_list.append(cleaned_s)
+    return final_list

sentence_splitter/sentence_splitter.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from util.llm import LLaMaCPP
+from time import sleep
+# with open(Path(__file__).resolve().parent.absolute().__str__() + '/prompt.md', 'r', encoding='utf-8') as _f:
+#     PROMPT = _f.read()
+SPECIAL = [c.encode('utf-8') for c in 'äöüÄÖÜéèà'] + [b'\xc2\xab', b'\xc2\xbb']  # noqa
+def split(text: str) -> list:
+    """
+    Splits a text into sentences using the generated function
+    :param text: The text to split
+    :return: A list of sentences
+    """
+    from function import split as split_func
+    return split_func(text)
+def run_ai(llm: LLaMaCPP, error: Exception, string: str, sentences: list, sentences_ai: list) -> None:
+    """
+    Use an AI language model to fix the sentence splitting function when it fails to correctly process text.
+    :param llm: The LLaMaCPP language model instance to use for generating the improved function
+    :param error: The exception that was raised during sentence splitting
+    :param string: The original text string that caused the error
+    :param sentences: The expected correct sentence splitting result (ground truth)
+    :param sentences_ai: The incorrect sentence splitting result produced by the current implementation
+    :return: None
+    """
+    # Read the current implementation
+    with open('function.py', 'r', encoding='utf-8') as f:
+        function = f.read()
+    string = repr(string)
+    sentences_ = [repr(s) for s in sentences]
+    sentences_ai_ = [repr(s) for s in sentences_ai]
+    # Construct the prompt
+    prompt = PROMPT.replace('{PROGRAM}', function)
+    prompt = prompt.replace('{ERROR}', repr(error))
+    prompt = prompt.replace('{STRING}', string)
+    prompt = prompt.replace('{SENTENCES}', f"[{', '.join(sentences_)}]")
+    prompt = prompt.replace('{SENTENCES_AI}', f"[{', '.join(sentences_ai_)}]")
+    # Use a simplified conversation template for Qwen3
+    conversation = f"<|im_start|>user\n{prompt}\n<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
+    print(conversation)
+    output = llm.generate(conversation)
+    print(output)
+    # Extract the function
+    output = output.replace('\n```\n```', '\n```')
+    output = output.rsplit('```')[-2]
+    # Write to disk
+    with open('function.py', 'w', encoding='utf-8') as f:
+        f.write(output)
+# def train() -> None:
+#     """
+#     Iteratively improve the sentence splitting function using an AI language model. Use `ctrl+c` to stop the training.
+#     :return: None.
+#     """
+#     llm = LLaMaCPP()
+#     llm.set_model('Qwen3-32B-Q4_K_S.gguf')
+#     llm.load_model(print_log=True, seed=42, threads=16, kv_cache_type='q8_0', context=16384)
+#     while llm.is_loading() or not llm.is_running():
+#         sleep(1)
+#     for element in DATA:
+#         try:
+#             sentences = element['sentences']
+#             string = element['string']
+#             sentences_ai = []
+#             try:
+#                 from function import split
+#                 sentences_ai = split(text=string)
+#                 assert sentences == sentences_ai
+#             except Exception as e:
+#                 e.add_note(f"Error with datapoint ```{string}```")
+#                 run_ai(llm, e, string, sentences, sentences_ai)
+#             finally:
+#                 del split
+#         except KeyboardInterrupt:
+#             break
+#     llm.stop()
+#
+#
+# if __name__ == '__main__':
+#     train()

util/llm.py ADDED Viewed

	@@ -0,0 +1,271 @@

+from jinja2 import Template
+from json import load
+from os import listdir
+from os.path import getsize
+from requests import request, RequestException
+from subprocess import Popen, PIPE, run
+from threading import Lock
+import typing as t
+__all__ = [
+    'LLaMaCPP',
+    'LLMS',
+]
+with open('/opt/llms/index.json', 'r') as _f:
+    LLMS = load(_f)
+class LLaMaCPP:
+    def __init__(self):
+        self._model_name = None
+        self._process = None
+        self._readers = 0
+        self._read_lock = Lock()
+        self._write_lock = Lock()
+    def _add_reader(self):
+        with self._read_lock:
+            self._readers += 1
+            if self._readers == 1:
+                self._write_lock.acquire()
+    def _remove_reader(self):
+        with self._read_lock:
+            self._readers -= 1
+            if self._readers == 0:
+                self._write_lock.release()
+    def set_model(self, model_name: str) -> None:
+        if model_name not in self.list_available_models():
+            raise Exception(f"Model {model_name} not found")
+        with self._write_lock:
+            self._model_name = model_name
+    def load_model(self, print_log: bool = False, seed: int = None, threads: int = None, kv_cache_type: t.Optional[t.Literal['f16', 'bf16', 'q8_0', 'q5_0', 'q4_0']] = None, context: int = None, temperature: float = None, top_p: float = None, top_k: int = None, min_p: float = None) -> None:
+        if self.process_is_alive():
+            raise Exception("A model is already loaded. Use stop() before loading a new model.")
+        if self._model_name is None:
+            raise Exception("Model not set")
+        short_name = self.short_model_name(self._model_name)
+        if short_name is None:
+            raise Exception(f"Model {self._model_name} not found")
+        if seed is None:
+            seed = -1
+        if threads is None:
+            threads = 16
+        if kv_cache_type is None:
+            kv_cache_type = 'q8_0'
+        context = min_none(context, LLMS[short_name]['context'])
+        if temperature is None:
+            temperature = LLMS[short_name]['sampling']['temperature']
+        if top_p is None:
+            top_p = LLMS[short_name]['sampling']['top_p']
+        if top_k is None:
+            top_k = LLMS[short_name]['sampling']['top_k']
+        if min_p is None:
+            min_p = LLMS[short_name]['sampling']['min_p']
+        with self._write_lock:
+            offload_layers = calculate_offload_layers(self._model_name, short_name)
+            print(f"Loading model {self._model_name} with {offload_layers} layers offloaded")
+            command = [
+                '/opt/llama.cpp/bin/llama-server',
+                '--threads', str(threads),
+                '--ctx-size', str(context),
+                '--flash-attn',
+                '--no-escape',
+                '--cache-type-k', kv_cache_type,
+                '--cache-type-v', kv_cache_type,
+                '--batch-size', '32',
+                '--ubatch-size', '16',
+                '--mlock',
+                '--n-gpu-layers', str(offload_layers),
+                '--model', f'/opt/llms/{self._model_name}',
+                '--seed', str(seed),
+                '--temp', str(temperature),
+                '--top-k', str(top_k),
+                '--top-p', str(top_p),
+                '--min-p', str(min_p),
+                '--host', '127.0.0.1',
+                '--port', '8432',
+                '--alias', short_name,
+            ]
+            if print_log:
+                stdout = None
+                stderr = None
+            else:
+                stdout = PIPE
+                stderr = PIPE
+            self._process = Popen(command, stdout=stdout, stderr=stderr, text=True)
+        return None
+    def apply_chat_template(self, conversation: t.List[t.Dict[str, str]], enable_thinking: bool = False) -> str:
+        short_name = self.short_model_name(self._model_name)
+        chat_template: str = LLMS[short_name]['chat_template']
+        template = Template(chat_template)
+        options: t.Dict[str, t.Any] = {
+            'messages': conversation,
+            'tools': [],
+            'add_generation_prompt': True,
+            'enable_thinking': False,
+        }
+        if LLMS[short_name]['thinking']:
+            if LLMS[short_name]['optional_thinking']:
+                options['enable_thinking'] = enable_thinking
+            else:
+                options['enable_thinking'] = True
+        else:
+            options['enable_thinking'] = False
+        return template.render(**options)
+    def generate(self, prompt: t.Union[str, t.List[t.Dict[str, str]]], enable_thinking: bool = False, temperature: float = None, top_k: int = None, top_p: float = None, min_p: float = None, n_predict: int = None, grammar: str = None, seed: int = None) -> str:  # type: ignore
+        if isinstance(prompt, list):
+            prompt = self.apply_chat_template(prompt, enable_thinking)
+        json_data: t.Dict[str, t.Any] = {
+            'prompt': prompt,
+        }
+        if temperature is not None:
+            json_data['temperature'] = temperature
+        if top_k is not None:
+            json_data['top_k'] = top_k
+        if top_p is not None:
+            json_data['top_p'] = top_p
+        if min_p is not None:
+            json_data['min_p'] = min_p
+        if n_predict is not None:
+            json_data['n_predict'] = n_predict
+        if grammar is not None:
+            json_data['grammar'] = grammar
+        if seed is not None:
+            json_data['seed'] = seed
+        self._add_reader()
+        try:
+            req = request('POST', 'http://127.0.0.1:8432/completion', json=json_data)
+            if req.status_code != 200:
+                raise Exception(req.text)
+            json_return = req.json()
+            return json_return['content']
+        finally:
+            self._remove_reader()
+    def process_is_alive(self) -> bool:  # type: ignore
+        self._add_reader()
+        try:
+            if self._process is None:
+                return False
+            return self._process.poll() is None
+        finally:
+            self._remove_reader()
+    def is_loading(self) -> bool:  # type: ignore
+        self._add_reader()
+        try:
+            req = request('GET', 'http://127.0.0.1:8432/health')
+            return req.status_code == 503
+        except RequestException:
+            return False
+        finally:
+            self._remove_reader()
+    def is_running(self) -> bool:  # type: ignore
+        self._add_reader()
+        try:
+            req = request('GET', 'http://127.0.0.1:8432/health')
+            return req.status_code == 200
+        except RequestException:
+            return False
+        finally:
+            self._remove_reader()
+    def has_error(self) -> bool:  # type: ignore
+        self._add_reader()
+        try:
+            req = request('GET', 'http://127.0.0.1:8432/health')
+            return req.status_code not in [200, 503]
+        except RequestException:
+            return True
+        finally:
+            self._remove_reader()
+    def stop(self) -> None:
+        with self._write_lock:
+            if self._process is None:
+                return None
+            self._process.terminate()
+            return None
+    def kill(self):
+        with self._write_lock:
+            if self._process is None:
+                return None
+            self._process.kill()
+            return None
+    def get_system_message(self) -> t.List[t.Dict[str, str]]:
+        short_name = self.short_model_name(self._model_name)
+        system_message = LLMS[short_name]['system_message']
+        if system_message == '':
+            return []
+        return [{'role': 'system', 'content': system_message}]
+    @staticmethod
+    def list_available_models() -> t.List[str]:
+        directory_list = listdir('/opt/llms/')
+        model_list = []
+        for entry in directory_list:
+            if entry.endswith('.gguf') and LLaMaCPP.short_model_name(entry) is not None:
+                model_list.append(entry)
+        return model_list
+    @staticmethod
+    def short_model_name(model_name: str) -> t.Optional[str]:
+        for model in sorted(LLMS.keys(), key=lambda x: len(x) , reverse=True):
+            if model_name.startswith(model):
+                return model
+        return None
+def min_none(a: t.Any, b: t.Any) -> t.Any:
+    """
+    Returns the minimum of two values, or the single value if one of them is None.
+    :param a: First value
+    :param b: Second value
+    :return: The minimum of a and b, or a/b if one of them is None
+    """
+    if a is None:
+        return b
+    if b is None:
+        return a
+    return min(a, b)
+def calculate_offload_layers(model_name: str, short_model_name: str) -> int:
+    """
+    Calculates the number of layers to offload
+    :param model_name: The name of the model
+    :param short_model_name: The short name of the model
+    :return: The number of layers to offload
+    """
+    free_vram = check_free_vram()
+    llm_size = getsize(f"/opt/llms/{model_name}") / (1024 ** 2)
+    llm_size = llm_size * 1.1
+    layers = LLMS[short_model_name]['layers']
+    vram_per_layer = llm_size / layers
+    return min(int(free_vram / vram_per_layer), layers)
+def check_free_vram() -> int:
+    """
+    Checks the amount of free VRAM on the GPU
+    :return: The amount of free VRAM in MB
+    """
+    nvidia_smi = run(['nvidia-smi', '--query-gpu=memory.free', '--format=csv,nounits,noheader'], stdout=PIPE, text=True)
+    if nvidia_smi.returncode != 0:
+        raise Exception(nvidia_smi.stderr)
+    return int(nvidia_smi.stdout)