From 7807f24509e226d8894ec1f198e7a7a172aab56c Mon Sep 17 00:00:00 2001
From: Phil Wang <lucidrains@gmail.com>
Date: Thu, 29 Apr 2021 15:39:41 -0700
Subject: [PATCH] fix small bug

---
 README.md                | 2 +-
 setup.py                 | 2 +-
 vit_pytorch/twins_svt.py | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 2113023..2784326 100644
--- a/README.md
+++ b/README.md
@@ -338,7 +338,7 @@ pred = v(img) # (1, 1000)
 
 <img src="./images/twins_svt.png" width="400px"></img>
 
-This <a href="https://arxiv.org/abs/2104.13840">paper</a> mixes local and global attention, along with position encoding generator (proposed in <a href="https://arxiv.org/abs/2102.10882">CPVT</a>) and global average pooling, to achieve the same results as <a href="https://arxiv.org/abs/2103.14030">Swin</a>, without the extra complexity of shifted windows, etc.
+This <a href="https://arxiv.org/abs/2104.13840">paper</a> proposes mixing local and global attention, along with position encoding generator (proposed in <a href="https://arxiv.org/abs/2102.10882">CPVT</a>) and global average pooling, to achieve the same results as <a href="https://arxiv.org/abs/2103.14030">Swin</a>, without the extra complexity of shifted windows, CLS tokens, nor positional embeddings.
 
 ```python
 import torch
diff --git a/setup.py b/setup.py
index e159a8d..603faaa 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 setup(
   name = 'vit-pytorch',
   packages = find_packages(exclude=['examples']),
-  version = '0.17.0',
+  version = '0.17.1',
   license='MIT',
   description = 'Vision Transformer (ViT) - Pytorch',
   author = 'Phil Wang',
diff --git a/vit_pytorch/twins_svt.py b/vit_pytorch/twins_svt.py
index caf5d77..76eafe5 100644
--- a/vit_pytorch/twins_svt.py
+++ b/vit_pytorch/twins_svt.py
@@ -162,11 +162,11 @@ class Transformer(nn.Module):
                 Residual(PreNorm(dim, FeedForward(dim, mlp_mult, dropout = dropout)))
             ]))
     def forward(self, x):
-        for local_attn, ff, global_attn, ff in self.layers:
+        for local_attn, ff1, global_attn, ff2 in self.layers:
             x = local_attn(x)
-            x = ff(x)
+            x = ff1(x)
             x = global_attn(x)
-            x = ff(x)
+            x = ff2(x)
         return x
 
 class TwinsSVT(nn.Module):