--added components

manujosephv · manujosephv · commit 014e6199040e · 2021-06-20T08:59:26.000+05:30
diff --git a/pytorch_tabular/models/tab_transformer/components.py b/pytorch_tabular/models/tab_transformer/components.py
@@ -0,0 +1,119 @@
+import torch
+import torch.nn.functional as F
+from torch import nn, einsum
+
+from einops import rearrange
+
+
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(x, **kwargs) + x
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+
+# attention
+
+class GEGLU(nn.Module):
+    def forward(self, x):
+        x, gates = x.chunk(2, dim = -1)
+        return x * F.gelu(gates)
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, mult = 4, dropout = 0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, dim * mult * 2),
+            GEGLU(),
+            nn.Dropout(dropout),
+            nn.Linear(dim * mult, dim)
+        )
+
+    def forward(self, x, **kwargs):
+        return self.net(x)
+
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim,
+        heads = 8,
+        dim_head = 16,
+        dropout = 0.
+    ):
+        super().__init__()
+        inner_dim = dim_head * heads
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
+        self.to_out = nn.Linear(inner_dim, dim)
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        h = self.heads
+        q, k, v = self.to_qkv(x).chunk(3, dim = -1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v))
+        sim = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
+
+        attn = sim.softmax(dim = -1)
+        attn = self.dropout(attn)
+
+        out = einsum('b h i j, b h j d -> b h i d', attn, v)
+        out = rearrange(out, 'b h n d -> b n (h d)', h = h)
+        return self.to_out(out)
+
+# transformer
+
+class Transformer(nn.Module):
+    def __init__(self, num_tokens, dim, depth, heads, dim_head, attn_dropout, ff_dropout):
+        super().__init__()
+        self.embeds = nn.Embedding(num_tokens, dim)
+        self.layers = nn.ModuleList([])
+
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                Residual(PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = attn_dropout))),
+                Residual(PreNorm(dim, FeedForward(dim, dropout = ff_dropout))),
+            ]))
+
+    def forward(self, x):
+        x = self.embeds(x)
+
+        for attn, ff in self.layers:
+            x = attn(x)
+            x = ff(x)
+
+        return x
+# mlp
+
+class MLP(nn.Module):
+    def __init__(self, dims, act = None):
+        super().__init__()
+        dims_pairs = list(zip(dims[:-1], dims[1:]))
+        layers = []
+        for ind, (dim_in, dim_out) in enumerate(dims_pairs):
+            is_last = ind >= (len(dims) - 1)
+            linear = nn.Linear(dim_in, dim_out)
+            layers.append(linear)
+
+            if is_last:
+                continue
+
+            act = default(act, nn.ReLU())
+            layers.append(act)
+
+        self.mlp = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.mlp(x)