| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import math |
|
|
| class PositionalEncoding(nn.Module): |
| def __init__(self, d_model, max_len=512, dropout=0.1): |
| super(PositionalEncoding, self).__init__() |
| self.dropout = nn.Dropout(p=dropout) |
|
|
| position = torch.arange(0, max_len).unsqueeze(1) |
| div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)) |
| pe = torch.zeros(max_len, 1, d_model) |
| pe[:, 0, 0::2] = torch.sin(position * div_term) |
| pe[:, 0, 1::2] = torch.cos(position * div_term) |
| self.register_buffer('pe', pe) |
|
|
| def forward(self, x): |
| x = x + self.pe[:x.size(0), :] |
| return self.dropout(x) |
|
|
| class GEM(nn.Module): |
| def __init__(self, vocab_size, d_model, n_heads, d_ff, n_layers, dropout=0.1): |
| super(GEM, self).__init__() |
| self.embedding = nn.Embedding(vocab_size, d_model) |
| self.positional_encoding = PositionalEncoding(d_model, dropout=dropout) |
| encoder_layers = nn.TransformerEncoderLayer(d_model, n_heads, d_ff, dropout, batch_first=True) |
| self.transformer_encoder = nn.TransformerEncoder(encoder_layers, n_layers) |
| self.fc_out = nn.Linear(d_model, vocab_size) |
| self.d_model = d_model |
|
|
| def forward(self, input_ids, attention_mask=None): |
| x = self.embedding(input_ids) * math.sqrt(self.d_model) |
| x = self.positional_encoding(x) |
| |
| if attention_mask is not None: |
| |
| |
| attention_mask = attention_mask.bool() |
| x = self.transformer_encoder(x, src_key_padding_mask=attention_mask) |
| else: |
| x = self.transformer_encoder(x) |
| |
| x = self.fc_out(x) |
| return x |
|
|
| def generate(self, input_ids, max_length, temperature=1.0): |
| self.eval() |
| with torch.no_grad(): |
| for _ in range(max_length - input_ids.size(1)): |
| outputs = self(input_ids) |
| next_token_logits = outputs[:, -1, :] / temperature |
| next_token = torch.multinomial(F.softmax(next_token_logits, dim=-1), num_samples=1) |
| input_ids = torch.cat([input_ids, next_token], dim=-1) |
| return input_ids |
|
|