| # positional.py | |
| import torch | |
| import torch.nn as nn | |
| from math import log | |
| class PositionalEncoding(nn.Module): | |
| def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 500): | |
| super().__init__() | |
| self.dropout = nn.Dropout(p=dropout) | |
| position = torch.arange(max_len, dtype=torch.float).unsqueeze(1) # (max_len, 1) | |
| div_term = torch.exp( | |
| torch.arange(0, d_model, 2, dtype=torch.float) * (-log(10000.0) / d_model) | |
| ) # (d_model/2,) | |
| pe = torch.zeros(max_len, d_model, dtype=torch.float) # (max_len, d_model) | |
| pe[:, 0::2] = torch.sin(position * div_term) # (max_len, d_model/2) | |
| pe[:, 1::2] = torch.cos(position * div_term) | |
| pe = pe.unsqueeze(0) | |
| self.register_buffer("pe", pe, persistent=False) # buffer, pas paramètre | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| # x: (B, S, D) | |
| s = x.size(1) | |
| x = x + self.pe[:, :s, :] # (1,S,D) broadcast -> (B,S,D) | |
| return self.dropout(x) |