Compare commits

...

7 Commits

Author SHA1 Message Date
Phil Wang
f196d1ec5b move freqs in RvT to linspace 2021-10-05 09:23:44 -07:00
Phil Wang
529044c9b3 Merge pull request #153 from developer0hye/fix-example
fix transforms for val an test process in example code
2021-09-02 06:57:16 -07:00
yhkwon-DT01
c30655f3bc fix transforms for val an test process 2021-09-02 17:30:18 +09:00
Phil Wang
d2d6de01d3 0.20.7 2021-08-30 08:14:43 -07:00
Phil Wang
b9eadaef60 Merge pull request #151 from developer0hye/patch-1
Cleanup Attention Class & matmul based implementation for TensorRT conversion
2021-08-30 08:14:11 -07:00
Yonghye Kwon
24ac8350bf remove unused package 2021-08-30 18:25:03 +09:00
Yonghye Kwon
ca3cef9de0 Cleanup Attention Class 2021-08-30 18:05:16 +09:00
4 changed files with 14 additions and 17 deletions

View File

@@ -364,9 +364,8 @@
"\n",
"val_transforms = transforms.Compose(\n",
" [\n",
" transforms.Resize((224, 224)),\n",
" transforms.RandomResizedCrop(224),\n",
" transforms.RandomHorizontalFlip(),\n",
" transforms.Resize(256),\n",
" transforms.CenterCrop(224),\n",
" transforms.ToTensor(),\n",
" ]\n",
")\n",
@@ -374,9 +373,8 @@
"\n",
"test_transforms = transforms.Compose(\n",
" [\n",
" transforms.Resize((224, 224)),\n",
" transforms.RandomResizedCrop(224),\n",
" transforms.RandomHorizontalFlip(),\n",
" transforms.Resize(256),\n",
" transforms.CenterCrop(224),\n",
" transforms.ToTensor(),\n",
" ]\n",
")\n"
@@ -6250,4 +6248,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}

View File

@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
name = 'vit-pytorch',
packages = find_packages(exclude=['examples']),
version = '0.20.6',
version = '0.20.8',
license='MIT',
description = 'Vision Transformer (ViT) - Pytorch',
author = 'Phil Wang',

View File

@@ -19,7 +19,7 @@ class AxialRotaryEmbedding(nn.Module):
def __init__(self, dim, max_freq = 10):
super().__init__()
self.dim = dim
scales = torch.logspace(0., log(max_freq / 2) / log(2), self.dim // 4, base = 2)
scales = torch.linspace(1., max_freq / 2, self.dim // 4)
self.register_buffer('scales', scales)
def forward(self, x):
@@ -154,10 +154,10 @@ class Attention(nn.Module):
return self.to_out(out)
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0., use_rotary = True, use_ds_conv = True, use_glu = True):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, image_size, dropout = 0., use_rotary = True, use_ds_conv = True, use_glu = True):
super().__init__()
self.layers = nn.ModuleList([])
self.pos_emb = AxialRotaryEmbedding(dim_head)
self.pos_emb = AxialRotaryEmbedding(dim_head, max_freq = image_size)
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout, use_rotary = use_rotary, use_ds_conv = use_ds_conv)),
@@ -187,7 +187,7 @@ class RvT(nn.Module):
)
self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout, use_rotary, use_ds_conv, use_glu)
self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, image_size, dropout, use_rotary, use_ds_conv, use_glu)
self.mlp_head = nn.Sequential(
nn.LayerNorm(dim),

View File

@@ -1,5 +1,5 @@
import torch
from torch import nn, einsum
from torch import nn
from einops import rearrange, repeat
from einops.layers.torch import Rearrange
@@ -50,15 +50,14 @@ class Attention(nn.Module):
) if project_out else nn.Identity()
def forward(self, x):
b, n, _, h = *x.shape, self.heads
qkv = self.to_qkv(x).chunk(3, dim = -1)
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), qkv)
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)
dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
attn = self.attend(dots)
out = einsum('b h i j, b h j d -> b h i d', attn, v)
out = torch.matmul(attn, v)
out = rearrange(out, 'b h n d -> b n (h d)')
return self.to_out(out)