Spaces:

PhurinutR
/

ecg_reconstruction

Sleeping

App Files Files Community

ecg_reconstruction / mae /encoder.py

PhurinutR

followed CoRe-ECG idea

7a63dcf 19 days ago

raw

history blame contribute delete

2.71 kB

	"""Visibility-restricted encoder attention (CoRe-ECG reconstruction encoder)."""

	from __future__ import annotations

	import math

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from mae.mlp import MLP


	def build_encoder_attn_bias(v: torch.Tensor) -> torch.Tensor:
	"""
	Additive attention bias (B, L, L): visible queries attend to visible keys;
	non-visible queries use identity self-attention row.
	"""
	B, L = v.shape
	dtype = torch.float32
	pair_ok = v.unsqueeze(2) & v.unsqueeze(1)
	bias = torch.zeros(B, L, L, device=v.device, dtype=dtype)
	bias.masked_fill_(~pair_ok, -1e4)
	not_q = ~v
	eye = torch.eye(L, device=v.device, dtype=dtype).unsqueeze(0)
	off_diag = torch.full((1, L, L), -1e4, device=v.device, dtype=dtype)
	identity_row = torch.where(eye > 0.5, torch.zeros_like(off_diag), off_diag)
	identity_row = identity_row.expand(B, -1, -1)
	bias = torch.where(not_q.unsqueeze(-1), identity_row, bias)
	return bias


	class EncoderAttentionBlock(nn.Module):
	def __init__(self, dim: int, n_heads: int, mlp_ratio: float, dropout: float):
	super().__init__()
	if dim % n_heads != 0:
	raise ValueError("d_model must be divisible by n_heads")
	self.n_heads = n_heads
	self.head_dim = dim // n_heads
	self.scale = self.head_dim**-0.5
	self.norm1 = nn.LayerNorm(dim)
	self.norm2 = nn.LayerNorm(dim)
	self.qkv = nn.Linear(dim, 3 * dim)
	self.proj = nn.Linear(dim, dim)
	self.attn_drop = nn.Dropout(dropout)
	self.mlp = MLP(dim, int(dim * mlp_ratio), dropout)

	def forward(self, x: torch.Tensor, v: torch.Tensor, attn_bias: torch.Tensor) -> torch.Tensor:
	x_norm = self.norm1(x)
	out = self._visibility_attn(x_norm, v, attn_bias)
	x = x + out
	x = x + self.mlp(self.norm2(x))
	x = x * v.unsqueeze(-1).to(x.dtype)
	return x

	def _visibility_attn(
	self, x: torch.Tensor, v: torch.Tensor, attn_bias: torch.Tensor
	) -> torch.Tensor:
	B, L, D = x.shape
	H, Dh = self.n_heads, self.head_dim
	qkv = self.qkv(x).reshape(B, L, 3, H, Dh).permute(2, 0, 3, 1, 4)
	q, k, val = qkv[0], qkv[1], qkv[2]
	scores = torch.matmul(q, k.transpose(-2, -1)) * self.scale
	scores = scores + attn_bias.unsqueeze(1)
	attn = F.softmax(scores, dim=-1)
	attn = attn * v.unsqueeze(1).unsqueeze(-1)
	attn_sum = attn.sum(dim=-1, keepdim=True)
	attn = torch.where(attn_sum > 0, attn / attn_sum.clamp_min(1e-8), attn)
	attn = self.attn_drop(attn)
	out = torch.matmul(attn, val).transpose(1, 2).reshape(B, L, D)
	return self.proj(out)