Spaces:

1plus1
/

MangaColorization

Running

anhth

Initial Commit

8314c30 13 days ago

15.6 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import numbers
	from dataclasses import dataclass, asdict
	from einops import rearrange

	class UpSample(nn.Module):
	""" UpSampling block using PixelShuffle """
	def __init__(self, filters=64):
	super().__init__()
	self.conv = nn.Conv2d(filters, filters * 2, kernel_size=1, stride=1, padding=0, bias=True)
	self.pixel_shuffle = nn.PixelShuffle(upscale_factor=2)

	def forward(self, x):
	x = self.conv(x)
	x = self.pixel_shuffle(x)
	return x

	## DownSampling block
	class DownSample(nn.Module):
	""" DownSampling block using PixelUnshuffle """
	def __init__(self, filters=64):
	super().__init__()
	self.conv = nn.Conv2d(filters, filters // 2, kernel_size=1, stride=1, padding=0, bias=True)
	self.pixel_unshuffle = nn.PixelUnshuffle(downscale_factor=2)

	def forward(self, x):
	""" SHAPE (B, C, H, W) -> SHAPE (B, C/4, H/2, W/2) """
	x = self.conv(x)
	x = self.pixel_unshuffle(x)
	return x

	# Custom LayerNormalization
	class BiasFree_LayerNorm(nn.Module):
	""" Bias-Free Layer Normalization """
	def __init__(self, normalized_shape):
	super().__init__()
	if isinstance(normalized_shape, numbers.Integral):
	normalized_shape = (normalized_shape,)
	normalized_shape = torch.Size(normalized_shape)

	assert len(normalized_shape) == 1

	self.weight = nn.Parameter(torch.ones(normalized_shape))
	self.normalized_shape = normalized_shape

	def forward(self, x):
	x = x.contiguous()
	sigma = x.var(-1, keepdim=True, unbiased=False)
	return x / torch.sqrt(sigma+1e-5) * self.weight

	class WithBias_LayerNorm(nn.Module):
	""" With-Bias Layer Normalization """
	def __init__(self, normalized_shape):
	super().__init__()
	if isinstance(normalized_shape, numbers.Integral):
	normalized_shape = (normalized_shape,)
	normalized_shape = torch.Size(normalized_shape)

	assert len(normalized_shape) == 1

	self.weight = nn.Parameter(torch.ones(normalized_shape))
	self.bias = nn.Parameter(torch.zeros(normalized_shape))
	self.normalized_shape = normalized_shape

	def forward(self, x):
	x = x.contiguous()
	mu = x.mean(-1, keepdim=True)
	sigma = x.var(-1, keepdim=True, unbiased=False)
	return (x - mu) / torch.sqrt(sigma+1e-5) * self.weight + self.bias

	class LayerNorm(nn.Module):
	""" Layer Normalization supporting two types: BiasFree and WithBias """
	def __init__(self, dim, LayerNorm_type, out_4d=True):
	super().__init__()
	if LayerNorm_type =='BiasFree':
	self.body = BiasFree_LayerNorm(dim)
	else:
	self.body = WithBias_LayerNorm(dim)
	self.out_4d = out_4d

	def to_3d(self, x):
	# Convert (B, C, H, W) to (B, H*W, C)
	if len(x.shape) == 3:
	return x
	elif len(x.shape) == 4:
	return rearrange(x, 'b c h w -> b (h w) c')
	else:
	raise ValueError("Input must be a 3D or 4D tensor")

	def to_4d(self, x, h, w):
	# Convert (B, H*W, C) to (B, C, H, W)
	if len(x.shape) == 4:
	return x
	elif len(x.shape) == 3:
	return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
	else:
	raise ValueError("Input must be a 3D or 4D tensor")

	def forward(self, x):
	if self.out_4d:
	h, w = x.shape[-2:]
	return self.to_4d(self.body(self.to_3d(x)), h, w)
	else:
	return self.body(x)

	class RepConv3(nn.Module):
	def __init__(self, in_channels, out_channels, groups, deploy=False):
	super().__init__()
	self.in_channels = in_channels
	self.out_channels = out_channels
	self.groups = groups
	self.deploy = deploy
	self.reparam = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, groups=groups)
	if not deploy:
	self.conv_3x3 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, groups=groups)
	self.conv_1x1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, groups=groups)
	self.conv_1x3 = nn.Conv2d(in_channels, out_channels, kernel_size=(1, 3), padding=(0, 1), groups=groups)
	self.conv_3x1 = nn.Conv2d(in_channels, out_channels, kernel_size=(3, 1), padding=(1, 0), groups=groups)
	self.conv_1x1_branch = nn.Conv2d(in_channels, in_channels, kernel_size=1, groups=groups, bias=False)
	self.conv_3x3_branch = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, groups=groups, bias=False)
	else:
	self._delete_branches()

	def _delete_branches(self):
	for name in ['conv_3x3','conv_1x1','conv_1x3','conv_3x1', 'conv_1x1_branch', 'conv_3x3_branch']:
	if hasattr(self, name):
	delattr(self, name)

	def fuse(self, delete_branches=True):
	if self.deploy:
	return
	# Extract weights and biases
	conv_3x3_w, conv_3x3_b = self.conv_3x3.weight, self.conv_3x3.bias
	conv_1x1_w, conv_1x1_b = self.conv_1x1.weight, self.conv_1x1.bias
	conv_1x3_w, conv_1x3_b = self.conv_1x3.weight, self.conv_1x3.bias
	conv_3x1_w, conv_3x1_b = self.conv_3x1.weight, self.conv_3x1.bias
	conv_1x1_branch_w, conv_3x3_branch_w = self.conv_1x1_branch.weight, self.conv_3x3_branch.weight
	# Pad the smaller kernels to 3x3
	conv_1x1_w_pad = F.pad(conv_1x1_w, [1, 1, 1, 1])
	conv_1x3_w_pad = F.pad(conv_1x3_w, [0, 0, 1, 1])
	conv_3x1_w_pad = F.pad(conv_3x1_w, [1, 1, 0, 0])
	if self.groups == 1:
	conv_1x1_3x3_w_pad = F.conv2d(conv_3x3_branch_w, conv_1x1_branch_w.permute(1, 0, 2, 3))
	else:
	w_slices = []
	conv_1x1_branch_w_T = conv_1x1_branch_w.permute(1, 0, 2, 3)
	in_channels_per_group = self.in_channels // self.groups
	out_channels_per_group = self.out_channels // self.groups
	for g in range(self.groups):
	# Slice the transposed 1x1 weights for this group's channels
	conv_1x1_branch_w_T_slice = conv_1x1_branch_w_T[:, gin_channels_per_group:(g+1)in_channels_per_group, :, :]
	# Slice the 3x3 weights for this group's output channels
	conv_3x3_branch_w_slice = conv_3x3_branch_w[gout_channels_per_group:(g+1)out_channels_per_group, :, :, :]
	w_slices.append(F.conv2d(conv_3x3_branch_w_slice, conv_1x1_branch_w_T_slice))
	conv_1x1_3x3_w_pad = torch.cat(w_slices, dim=0)
	# Fuse weights and biases
	conv_w = conv_3x3_w + conv_1x1_w_pad + conv_1x3_w_pad + conv_3x1_w_pad + conv_1x1_3x3_w_pad
	if conv_3x3_b is None:
	conv_3x3_b = torch.zeros(self.out_channels, device=conv_w.device)
	conv_b = conv_3x3_b + conv_1x1_b + conv_1x3_b + conv_3x1_b
	self.reparam.weight.data.copy_(conv_w)
	self.reparam.bias.data.copy_(conv_b)
	# Delete the original branches
	if delete_branches:
	self._delete_branches()
	# Set deploy flag
	self.deploy = True

	def forward(self, x):
	if self.deploy:
	return self.reparam(x)
	else:
	return self.conv_3x3(x) + self.conv_1x1(x) + self.conv_1x3(x) + self.conv_3x1(x) + self.conv_3x3_branch(self.conv_1x1_branch(x))

	from monarch_attn import MonarchAttention

	@dataclass
	class RepAttnConfig:
	dim: int
	num_heads: int = 8
	block_size: int = 16
	num_steps: int = 2
	pad_type: str = "pre"
	impl: str = "torch"
	deploy: bool = False

	class RepAttn(nn.Module):
	""" Re-parameterizable Attention Block using MonarchAttention as the core attention mechanism."""
	def __init__(self, dim, num_heads=8, block_size=14, num_steps=1, pad_type="pre", impl="torch", deploy=False):
	super().__init__()
	self.num_heads = num_heads
	self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1)
	self.monarch_attn = MonarchAttention(
	block_size=block_size,
	num_steps=num_steps,
	pad_type=pad_type,
	impl=impl
	)
	if deploy:
	self.attn_fn = self.monarch_attn
	else:
	self.attn_fn = self.common_attn
	self.proj = nn.Conv2d(dim, dim, kernel_size=1)
	self.deploy = deploy

	def common_attn(self, q, k, v):
	""" Scaled Dot-Product Attention """
	scale = (q.shape[-1]) ** -0.5
	attn = (q @ k.transpose(-2, -1)) * scale
	attn = attn.softmax(dim=-1)
	out = attn @ v
	return out

	@torch.no_grad()
	def fuse(self):
	if not self.deploy:
	self.attn_fn = self.monarch_attn
	self.deploy = True

	def forward(self, x):
	B, C, H, W = x.shape
	qkv = self.qkv(x)
	q, k, v = torch.chunk(qkv, 3, dim=1)
	q = rearrange(q, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
	k = rearrange(k, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
	v = rearrange(v, 'b (head c) h w -> b head c (h w)', head=self.num_heads)
	attn_out = self.attn_fn(q, k, v)
	attn_out = rearrange(attn_out, 'b head c (h w) -> b (head c) h w', head=self.num_heads, h=H, w=W)
	out = self.proj(attn_out)
	return out

	@dataclass
	class FFNConfig:
	dim: int
	expansion_factor: int = 1
	deploy: bool = False

	class RepFFN(nn.Module):
	def __init__(self, dim, expansion_factor=1, deploy=False):
	super().__init__()
	hidden_features = int(dim * expansion_factor)
	self.project_in = RepConv3(dim, hidden_features, groups=1, deploy=deploy)
	self.dwconv = RepConv3(hidden_features, hidden_features*2, groups=hidden_features, deploy=deploy)
	self.project_out = nn.Conv2d(hidden_features, dim, kernel_size=1)

	@torch.no_grad()
	def fuse(self):
	self.project_in.fuse()
	self.dwconv.fuse()


	def forward(self, x):
	x = self.project_in(x)
	x1, x2 = self.dwconv(x).chunk(2, dim=1)
	x = F.gelu(x1) * x2
	x = self.project_out(x)
	return x

	class SkipConnection(nn.Module):
	def __init__(self, dim):
	super().__init__()
	self.conv = nn.Conv2d(dim*2, dim, kernel_size=1)

	def forward(self, x1, x2):
	x = torch.cat([x1, x2], dim=1)
	x = self.conv(x)
	return x

	class RepTransformerBlock(nn.Module):
	def __init__(self, rep_attn_cfg: RepAttnConfig, ffn_cfg: FFNConfig, norm_type='WithBias'):
	super().__init__()
	self.rep_attn = RepAttn(**asdict(rep_attn_cfg))
	self.rep_ffn = RepFFN(**asdict(ffn_cfg))
	self.norm1 = LayerNorm(rep_attn_cfg.dim, norm_type)
	self.norm2 = LayerNorm(rep_attn_cfg.dim, norm_type)

	@torch.no_grad()
	def fuse(self):
	self.rep_attn.fuse()
	self.rep_ffn.fuse()

	def forward(self, x):
	x = x + self.rep_attn(self.norm1(x))
	x = x + self.rep_ffn(self.norm2(x))
	return x

	class Block(nn.Module):
	def __init__(self, num_block, rep_attn_cfg: RepAttnConfig, ffn_cfg: FFNConfig, norm_type='WithBias'):
	super().__init__()
	self.num_block = num_block
	self.blocks = nn.ModuleList([
	RepTransformerBlock(rep_attn_cfg, ffn_cfg, norm_type) for _ in range(num_block)
	])

	@torch.no_grad()
	def fuse(self):
	for block in self.blocks:
	block.fuse()

	def forward(self, x):
	for block in self.blocks:
	x = block(x)
	return x

	class ColorComicNet(nn.Module):
	""" Main model implementation """
	def __init__(self, input_shape=(3, 1024, 1024), output_channels=3, deploy=False, dims=[48, 96, 192, 384], num_blocks=[4, 6, 6, 8], num_heads=[1, 2, 2, 4], bias=True, last_act=None):
	super().__init__()
	assert len(dims) == len(num_blocks) == len(num_heads), "Length of dims, num_blocks and num_heads must be the same"
	self.input_shape = input_shape
	self.output_channels = output_channels
	self.deploy = deploy
	self.dims = dims
	self.num_blocks = num_blocks
	self.bias = bias
	self.num_heads = num_heads

	# Extractor
	self.stem = nn.Conv2d(input_shape[0], dims[0], kernel_size=7, stride=4, padding=3, bias=bias)

	# Encoder
	layers = []
	down_convs = []
	for idx in range(len(dims)):
	attn_cfg, ffn_cfg = self.build_cfg(dims[idx], num_heads[idx])
	block = Block(num_blocks[idx], attn_cfg, ffn_cfg, norm_type='WithBias')
	if idx < len(dims) - 1:
	down_convs.append(DownSample(dims[idx]))
	layers.append(block)
	self.bottleneck = layers[-1] # Last encoder layer as bottleneck
	self.encoder = nn.ModuleList(layers[:-1])
	self.downsample = nn.ModuleList(down_convs)

	# Decoder
	layers = []
	up_convs = []
	skip_connections = []
	for idx in range(len(dims)-2, -1, -1):
	attn_cfg, ffn_cfg = self.build_cfg(dims[idx], num_heads[idx])
	# print(f"Decoder layer {idx}: shape {l_shape}")
	up_conv = UpSample(dims[idx+1])
	block = Block(num_blocks[idx], attn_cfg, ffn_cfg, norm_type='WithBias')
	layers.append(block)
	up_convs.append(up_conv)
	skip_connections.append(SkipConnection(dims[idx]))
	self.decoder = nn.ModuleList(layers)
	self.up_sample = nn.ModuleList(up_convs)
	self.skip = nn.ModuleList(skip_connections)

	# Head
	self.head = nn.Sequential(
	RepConv3(dims[0], dims[0]//2, 1, deploy=deploy),
	nn.GELU(),
	nn.Conv2d(dims[0]//2, output_channels, kernel_size=1, bias=bias),
	)
	self.last_act = last_act if last_act is not None else nn.Identity()

	@torch.no_grad()
	def fuse(self):
	for block in self.encoder:
	block.fuse()
	self.bottleneck.fuse()
	for block in self.decoder:
	block.fuse()
	for conv in self.head:
	if isinstance(conv, RepConv3):
	conv.fuse()

	def build_cfg(self, dim, head):
	# RepAttn config
	attn_cfg = RepAttnConfig(
	dim=dim,
	num_heads=head,
	block_size=12,
	num_steps=2,
	pad_type="pre",
	impl="torch",
	deploy=self.deploy
	)
	## FFN config
	ffn_cfg = FFNConfig(
	dim=dim,
	expansion_factor=1,
	)
	return attn_cfg, ffn_cfg

	def forward(self, x):
	"""
	x: (B, C, H, W)
	"""
	res = x
	x = self.stem(x)
	feats = []
	for blk, down in zip(self.encoder, self.downsample):
	x = blk(x)
	feats.append(x)
	x = down(x)
	x = self.bottleneck(x)
	for blk, up, skip in zip(self.decoder, self.up_sample, self.skip):
	x = up(x)
	cur_feat = feats.pop()
	x = skip(x, cur_feat)
	x = blk(x)
	x = F.interpolate(x, scale_factor=4, mode='bilinear')
	x = self.head(x) + res
	x = self.last_act(x)
	return x

	# Example model configuration
	MODEL_CFG = {
	'input_shape': (3, 512, 512),
	'dims': [24, 48, 96, 192],
	'num_blocks': [1, 2, 2, 4],
	'num_heads': [1, 2, 4, 8],
	'bias': True,
	'last_act': nn.Tanh(),
	'deploy': False
	}