import torch import torch.nn as nn import torch.nn.functional as F from torchvision import transforms from PIL import Image from safetensors.torch import load_file from huggingface_hub import hf_hub_download # Definição da arquitetura da rede neural class CaptchaCNN(nn.Module): def __init__(self, input_dim, output_ndigits, output_vocab_size, dropout=(0.25, 0.5), dense_units=200, vocab=None): super().__init__() self.input_dim = input_dim self.output_ndigits = output_ndigits self.output_vocab_size = output_vocab_size self.vocab = vocab self.batchnorm0 = nn.BatchNorm2d(3) self.conv1 = nn.Conv2d(3, 32, kernel_size=3) self.batchnorm1 = nn.BatchNorm2d(32) self.conv2 = nn.Conv2d(32, 64, kernel_size=3) self.batchnorm2 = nn.BatchNorm2d(64) self.conv3 = nn.Conv2d(64, 64, kernel_size=3) self.batchnorm3 = nn.BatchNorm2d(64) self.dropout1 = nn.Dropout(dropout[0]) self.dropout2 = nn.Dropout(dropout[1]) # Cálculo das dimensões após as camadas convolucionais def calc_dim(x): for _ in range(3): x = (x - 2) // 2 return x conv_h = calc_dim(input_dim[0]) conv_w = calc_dim(input_dim[1]) fc1_in_features = conv_h * conv_w * 64 self.fc1 = nn.Linear(fc1_in_features, dense_units) self.batchnorm_dense = nn.BatchNorm1d(dense_units) self.fc2 = nn.Linear(dense_units, output_vocab_size * output_ndigits) def forward(self, x): x = self.batchnorm0(x) x = F.relu(self.batchnorm1(F.max_pool2d(self.conv1(x), 2))) x = F.relu(self.batchnorm2(F.max_pool2d(self.conv2(x), 2))) x = F.relu(self.batchnorm3(F.max_pool2d(self.conv3(x), 2))) x = torch.flatten(x, start_dim=1) x = self.dropout1(x) x = F.relu(self.batchnorm_dense(self.fc1(x))) x = self.dropout2(x) x = self.fc2(x) x = x.view(-1, self.output_ndigits, self.output_vocab_size) return x # Classe principal para carregar o modelo e fazer previsões class TJMG: def __init__(self, repo_id="julio/captcha", filename="captcha_model.safetensors"): # Hiperparâmetros do modelo self.input_dim = (40, 110) self.output_vocab_size = 10 self.vocab = [str(x) for x in range(10)] self.output_ndigits = 5 self.dropout = (0.25, 0.5) self.dense_units = 200 # Baixar o modelo do Hugging Face model_path = hf_hub_download(repo_id=repo_id, filename=filename) # Inicializar o modelo self.model = CaptchaCNN( input_dim=self.input_dim, output_ndigits=self.output_ndigits, output_vocab_size=self.output_vocab_size, dropout=self.dropout, dense_units=self.dense_units, vocab=self.vocab ) # Carregar os pesos do modelo state_dict = load_file(model_path) self.model.load_state_dict(state_dict) self.model.eval() # Transformação da imagem self.transform = transforms.Compose([ transforms.Resize(self.input_dim), transforms.ToTensor(), ]) def predict(self, image_path): """ Faz a previsão de um CAPTCHA. Args: image_path (str): Caminho da imagem do CAPTCHA. Returns: str: Texto previsto para o CAPTCHA. """ image = Image.open(image_path).convert('RGB') image = self.transform(image).unsqueeze(0) with torch.no_grad(): logits = self.model(image) preds = torch.argmax(logits, dim=2).squeeze().tolist() predicted_label = ''.join([self.vocab[i] for i in preds]) return predicted_label