# positional.py import torch import torch.nn as nn from math import log class PositionalEncoding(nn.Module): def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 500): super().__init__() self.dropout = nn.Dropout(p=dropout) position = torch.arange(max_len, dtype=torch.float).unsqueeze(1) # (max_len, 1) div_term = torch.exp( torch.arange(0, d_model, 2, dtype=torch.float) * (-log(10000.0) / d_model) ) # (d_model/2,) pe = torch.zeros(max_len, d_model, dtype=torch.float) # (max_len, d_model) pe[:, 0::2] = torch.sin(position * div_term) # (max_len, d_model/2) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer("pe", pe, persistent=False) # buffer, pas paramètre def forward(self, x: torch.Tensor) -> torch.Tensor: # x: (B, S, D) s = x.size(1) x = x + self.pe[:, :s, :] # (1,S,D) broadcast -> (B,S,D) return self.dropout(x)