0.20.7

Merge pull request #151 from developer0hye/patch-1
Cleanup Attention Class & matmul based implementation for TensorRT conversion
2025-12-30 16:12:29 +00:00 · 2021-08-30 08:14:43 -07:00 · 2021-08-30 08:14:11 -07:00 · 2021-08-30 18:25:03 +09:00 · 2021-08-30 18:05:16 +09:00 · 2021-08-21 09:03:54 -07:00
6 changed files with 11 additions and 12 deletions
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 setup(
  name = 'vit-pytorch',
  packages = find_packages(exclude=['examples']),
-  version = '0.20.2',
+  version = '0.20.7',
  license='MIT',
  description = 'Vision Transformer (ViT) - Pytorch',
  author = 'Phil Wang',
--- a/vit_pytorch/distill.py
+++ b/vit_pytorch/distill.py
@@ -148,6 +148,6 @@ class DistillWrapper(nn.Module):

        else:
            teacher_labels = teacher_logits.argmax(dim = -1)
-            distill_loss = F.cross_entropy(student_logits, teacher_labels)
+            distill_loss = F.cross_entropy(distill_logits, teacher_labels)

        return loss * (1 - alpha) + distill_loss * alpha
--- a/vit_pytorch/levit.py
+++ b/vit_pytorch/levit.py
@@ -29,7 +29,7 @@ class FeedForward(nn.Module):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(dim, dim * mult, 1),
-            nn.GELU(),
+            nn.Hardswish(),
            nn.Dropout(dropout),
            nn.Conv2d(dim * mult, dim, 1),
            nn.Dropout(dropout)
--- a/vit_pytorch/pit.py
+++ b/vit_pytorch/pit.py
@@ -175,7 +175,7 @@ class PiT(nn.Module):

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
-        x += self.pos_embedding
+        x += self.pos_embedding[:, :n+1]
        x = self.dropout(x)

        x = self.layers(x)
--- a/vit_pytorch/t2t.py
+++ b/vit_pytorch/t2t.py
@@ -35,13 +35,14 @@ class T2TViT(nn.Module):
        for i, (kernel_size, stride) in enumerate(t2t_layers):
            layer_dim *= kernel_size ** 2
            is_first = i == 0
+            is_last = i == (len(t2t_layers) - 1)
            output_image_size = conv_output_size(output_image_size, kernel_size, stride, stride // 2)

            layers.extend([
                RearrangeImage() if not is_first else nn.Identity(),
                nn.Unfold(kernel_size = kernel_size, stride = stride, padding = stride // 2),
                Rearrange('b c n -> b n c'),
-                Transformer(dim = layer_dim, heads = 1, depth = 1, dim_head = layer_dim, mlp_dim = layer_dim, dropout = dropout),
+                Transformer(dim = layer_dim, heads = 1, depth = 1, dim_head = layer_dim, mlp_dim = layer_dim, dropout = dropout) if not is_last else nn.Identity(),
            ])

        layers.append(nn.Linear(layer_dim, dim))
@@ -71,7 +72,7 @@ class T2TViT(nn.Module):

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
-        x += self.pos_embedding
+        x += self.pos_embedding[:, :n+1]
        x = self.dropout(x)

        x = self.transformer(x)
--- a/vit_pytorch/vit.py
+++ b/vit_pytorch/vit.py
@@ -1,6 +1,5 @@
 import torch
-from torch import nn, einsum
-import torch.nn.functional as F
+from torch import nn

 from einops import rearrange, repeat
 from einops.layers.torch import Rearrange
@@ -51,15 +50,14 @@ class Attention(nn.Module):
        ) if project_out else nn.Identity()

    def forward(self, x):
-        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim = -1)
-        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)

-        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
+        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale

        attn = self.attend(dots)

-        out = einsum('b h i j, b h j d -> b h i d', attn, v)
+        out = torch.matmul(attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        return self.to_out(out)
Author	SHA1	Message	Date
Phil Wang	d2d6de01d3	0.20.7	2021-08-30 08:14:43 -07:00
Phil Wang	b9eadaef60	Merge pull request #151 from developer0hye/patch-1 Cleanup Attention Class & matmul based implementation for TensorRT conversion	2021-08-30 08:14:11 -07:00
Yonghye Kwon	24ac8350bf	remove unused package	2021-08-30 18:25:03 +09:00
Yonghye Kwon	ca3cef9de0	Cleanup Attention Class	2021-08-30 18:05:16 +09:00
Phil Wang	6e1be11517	0.20.6	2021-08-21 09:03:54 -07:00
Phil Wang	73ed562ce4	Merge pull request #147 from developer0hye/patch-4 Make T2T process any scale image	2021-08-21 09:03:42 -07:00
Phil Wang	ff863175a6	Merge pull request #146 from developer0hye/patch-1 Make Pit process image with width and height less than the image_size	2021-08-21 09:03:31 -07:00
Yonghye Kwon	ca0bdca192	Make model process any scale image Related to #145	2021-08-21 22:35:26 +09:00
Yonghye Kwon	1c70271778	Support image with width and height less than the image_size Related to #145	2021-08-21 22:25:46 +09:00
Phil Wang	d7d3febfe3	Merge pull request #144 from developer0hye/patch-1 Remove unused package	2021-08-20 10:14:02 -07:00
Yonghye Kwon	946815164a	Remove unused package	2021-08-20 13:44:57 +09:00
Phil Wang	aeed3381c1	use hardswish for levit	2021-08-19 08:22:55 -07:00
Phil Wang	3f754956fb	remove last transformer layer in t2t	2021-08-14 08:06:23 -07:00
Phil Wang	918869571c	fix hard distillation, thanks to @CiaoHe	2021-08-12 08:40:57 -07:00