| |
| import json |
| import os |
| import threading |
| import time |
| import requests |
| from loguru import logger |
| from database import Database |
| from sentence_transformers import SentenceTransformer |
| import config |
|
|
| |
| MODEL_BASE = "qwen2.5:1.5b-instruct-q4_0" |
| MODEL_FINE = "akira-luanda-v25" |
| DATASET_PATH = "/app/dataset.jsonl" |
| MODelfile_PATH = "/app/Modelfile" |
| EMBEDDING_MODEL = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") |
|
|
| |
| _lock = threading.Lock() |
| _dataset = [] |
|
|
| def gerar_embedding(text: str): |
| return EMBEDDING_MODEL.encode(text, convert_to_numpy=True).tolist() |
|
|
| def salvar_dataset(): |
| with open(DATASET_PATH, "w", encoding="utf-8") as f: |
| for entry in _dataset: |
| f.write(json.dumps(entry, ensure_ascii=False) + "\n") |
|
|
| def criar_modelfile(): |
| modelfile = f""" |
| FROM {MODEL_BASE} |
| SYSTEM """ + f'"""{config.PERSONA}"""' + """ |
| PARAMETER temperature 0.9 |
| PARAMETER num_ctx 4096 |
| """ |
| with _lock: |
| data = _dataset.copy() |
| for d in data: |
| modelfile += f"\nUSER: {d['user']}\nASSISTANT: {d['assistant']}\n" |
| return modelfile |
|
|
| class Treinamento: |
| def __init__(self, db: Database, min_interactions: int = 25, interval_hours: int = 4): |
| self.db = db |
| self.min_interactions = min_interactions |
| self.interval = interval_hours * 3600 |
| self.thread = None |
| self.carregar_dataset() |
| self.iniciar_loop() |
|
|
| def carregar_dataset(self): |
| global _dataset |
| if os.path.exists(DATASET_PATH): |
| try: |
| with open(DATASET_PATH, "r", encoding="utf-8") as f: |
| _dataset = [json.loads(l) for l in f if l.strip()] |
| logger.info(f"{len(_dataset)} kandandos carregados do dataset!") |
| except Exception as e: |
| logger.error(f"Erro ao carregar dataset: {e}") |
| _dataset = [] |
|
|
| def iniciar_loop(self): |
| if not self.thread or not self.thread.is_alive(): |
| self.thread = threading.Thread(target=self._loop, daemon=True) |
| self.thread.start() |
| logger.info("Loop de fine-tune iniciado!") |
|
|
| def registrar_interacao(self, usuario, mensagem, resposta, numero, is_reply=False, mensagem_original=""): |
| try: |
| |
| self.db.salvar_mensagem(usuario, mensagem, resposta, numero) |
|
|
| |
| texto = f"{mensagem} {resposta}".lower() |
| embedding = gerar_embedding(texto) |
| self.db.salvar_embedding(numero, mensagem, resposta, embedding, texto=texto) |
|
|
| |
| entry = {"user": mensagem.strip(), "assistant": resposta.strip()} |
| with _lock: |
| _dataset.append(entry) |
| with open(DATASET_PATH, "a", encoding="utf-8") as f: |
| json.dump(entry, f, ensure_ascii=False) |
| f.write("\n") |
|
|
| logger.info(f"Kandando salvo: {len(_dataset)} total") |
|
|
| |
| if len(_dataset) >= self.min_interactions: |
| threading.Thread(target=self._treinar, daemon=True).start() |
|
|
| except Exception as e: |
| logger.error(f"Erro ao registrar: {e}") |
|
|
| def _treinar(self): |
| if len(_dataset) < self.min_interactions: |
| return |
| logger.info(f"INICIANDO FINE-TUNE → {MODEL_FINE} com {len(_dataset)} kandandos") |
|
|
| try: |
| salvar_dataset() |
| modelfile = criar_modelfile() |
| with open(MODelfile_PATH, "w", encoding="utf-8") as f: |
| f.write(modelfile) |
|
|
| files = {'modelfile': open(MODelfile_PATH, 'rb')} |
| data = {'name': MODEL_FINE} |
| resp = requests.post("http://localhost:11434/api/create", files=files, data=data, timeout=600) |
|
|
| if resp.status_code == 200: |
| config.OLLAMA_MODEL = MODEL_FINE |
| logger.success(f"MODELO {MODEL_FINE} CRIADO COM SUCESSO!") |
| else: |
| logger.error(f"Erro Ollama: {resp.status_code} {resp.text}") |
|
|
| os.remove(MODelfile_PATH) |
| except Exception as e: |
| logger.error(f"Erro no fine-tune: {e}") |
|
|
| def _loop(self): |
| while True: |
| time.sleep(self.interval) |
| if len(_dataset) >= self.min_interactions: |
| self._treinar() |