jtrecenti commited on
Commit
2758e03
·
1 Parent(s): 5a662a7

Upload do modulo captcha TJMG

Browse files
Files changed (1) hide show
  1. tjmg.py +108 -0
tjmg.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torchvision import transforms
5
+ from PIL import Image
6
+ from safetensors.torch import load_file
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ # Definição da arquitetura da rede neural
10
+ class CaptchaCNN(nn.Module):
11
+ def __init__(self, input_dim, output_ndigits, output_vocab_size, dropout=(0.25, 0.5), dense_units=200, vocab=None):
12
+ super().__init__()
13
+ self.input_dim = input_dim
14
+ self.output_ndigits = output_ndigits
15
+ self.output_vocab_size = output_vocab_size
16
+ self.vocab = vocab
17
+
18
+ self.batchnorm0 = nn.BatchNorm2d(3)
19
+ self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
20
+ self.batchnorm1 = nn.BatchNorm2d(32)
21
+ self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
22
+ self.batchnorm2 = nn.BatchNorm2d(64)
23
+ self.conv3 = nn.Conv2d(64, 64, kernel_size=3)
24
+ self.batchnorm3 = nn.BatchNorm2d(64)
25
+ self.dropout1 = nn.Dropout(dropout[0])
26
+ self.dropout2 = nn.Dropout(dropout[1])
27
+
28
+ # Cálculo das dimensões após as camadas convolucionais
29
+ def calc_dim(x):
30
+ for _ in range(3):
31
+ x = (x - 2) // 2
32
+ return x
33
+
34
+ conv_h = calc_dim(input_dim[0])
35
+ conv_w = calc_dim(input_dim[1])
36
+ fc1_in_features = conv_h * conv_w * 64
37
+
38
+ self.fc1 = nn.Linear(fc1_in_features, dense_units)
39
+ self.batchnorm_dense = nn.BatchNorm1d(dense_units)
40
+ self.fc2 = nn.Linear(dense_units, output_vocab_size * output_ndigits)
41
+
42
+ def forward(self, x):
43
+ x = self.batchnorm0(x)
44
+ x = F.relu(self.batchnorm1(F.max_pool2d(self.conv1(x), 2)))
45
+ x = F.relu(self.batchnorm2(F.max_pool2d(self.conv2(x), 2)))
46
+ x = F.relu(self.batchnorm3(F.max_pool2d(self.conv3(x), 2)))
47
+
48
+ x = torch.flatten(x, start_dim=1)
49
+ x = self.dropout1(x)
50
+ x = F.relu(self.batchnorm_dense(self.fc1(x)))
51
+ x = self.dropout2(x)
52
+ x = self.fc2(x)
53
+ x = x.view(-1, self.output_ndigits, self.output_vocab_size)
54
+ return x
55
+
56
+
57
+ # Classe principal para carregar o modelo e fazer previsões
58
+ class TJMG:
59
+ def __init__(self, repo_id="julio/captcha", filename="captcha_model.safetensors"):
60
+ # Hiperparâmetros do modelo
61
+ self.input_dim = (40, 110)
62
+ self.output_vocab_size = 10
63
+ self.vocab = [str(x) for x in range(10)]
64
+ self.output_ndigits = 5
65
+ self.dropout = (0.25, 0.5)
66
+ self.dense_units = 200
67
+
68
+ # Baixar o modelo do Hugging Face
69
+ model_path = hf_hub_download(repo_id=repo_id, filename=filename)
70
+
71
+ # Inicializar o modelo
72
+ self.model = CaptchaCNN(
73
+ input_dim=self.input_dim,
74
+ output_ndigits=self.output_ndigits,
75
+ output_vocab_size=self.output_vocab_size,
76
+ dropout=self.dropout,
77
+ dense_units=self.dense_units,
78
+ vocab=self.vocab
79
+ )
80
+
81
+ # Carregar os pesos do modelo
82
+ state_dict = load_file(model_path)
83
+ self.model.load_state_dict(state_dict)
84
+ self.model.eval()
85
+
86
+ # Transformação da imagem
87
+ self.transform = transforms.Compose([
88
+ transforms.Resize(self.input_dim),
89
+ transforms.ToTensor(),
90
+ ])
91
+
92
+ def predict(self, image_path):
93
+ """
94
+ Faz a previsão de um CAPTCHA.
95
+ Args:
96
+ image_path (str): Caminho da imagem do CAPTCHA.
97
+ Returns:
98
+ str: Texto previsto para o CAPTCHA.
99
+ """
100
+ image = Image.open(image_path).convert('RGB')
101
+ image = self.transform(image).unsqueeze(0)
102
+
103
+ with torch.no_grad():
104
+ logits = self.model(image)
105
+
106
+ preds = torch.argmax(logits, dim=2).squeeze().tolist()
107
+ predicted_label = ''.join([self.vocab[i] for i in preds])
108
+ return predicted_label