From 7807f24509e226d8894ec1f198e7a7a172aab56c Mon Sep 17 00:00:00 2001 From: Phil Wang Date: Thu, 29 Apr 2021 15:39:41 -0700 Subject: [PATCH] fix small bug --- README.md | 2 +- setup.py | 2 +- vit_pytorch/twins_svt.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2113023..2784326 100644 --- a/README.md +++ b/README.md @@ -338,7 +338,7 @@ pred = v(img) # (1, 1000) -This paper mixes local and global attention, along with position encoding generator (proposed in CPVT) and global average pooling, to achieve the same results as Swin, without the extra complexity of shifted windows, etc. +This paper proposes mixing local and global attention, along with position encoding generator (proposed in CPVT) and global average pooling, to achieve the same results as Swin, without the extra complexity of shifted windows, CLS tokens, nor positional embeddings. ```python import torch diff --git a/setup.py b/setup.py index e159a8d..603faaa 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup( name = 'vit-pytorch', packages = find_packages(exclude=['examples']), - version = '0.17.0', + version = '0.17.1', license='MIT', description = 'Vision Transformer (ViT) - Pytorch', author = 'Phil Wang', diff --git a/vit_pytorch/twins_svt.py b/vit_pytorch/twins_svt.py index caf5d77..76eafe5 100644 --- a/vit_pytorch/twins_svt.py +++ b/vit_pytorch/twins_svt.py @@ -162,11 +162,11 @@ class Transformer(nn.Module): Residual(PreNorm(dim, FeedForward(dim, mlp_mult, dropout = dropout))) ])) def forward(self, x): - for local_attn, ff, global_attn, ff in self.layers: + for local_attn, ff1, global_attn, ff2 in self.layers: x = local_attn(x) - x = ff(x) + x = ff1(x) x = global_attn(x) - x = ff(x) + x = ff2(x) return x class TwinsSVT(nn.Module):