address https://github.com/lucidrains/vit-pytorch/issues/304

Update setup.py (#303 )
2026-05-19 07:37:47 +00:00 · 2024-04-17 09:40:03 -07:00 · 2024-04-17 08:21:30 -07:00
2 changed files with 12 additions and 6 deletions
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,15 @@
 from setuptools import setup, find_packages

+with open('README.md') as f:
+    long_description = f.read()
+
 setup(
  name = 'vit-pytorch',
  packages = find_packages(exclude=['examples']),
-  version = '1.6.5',
+  version = '1.6.6',
  license='MIT',
  description = 'Vision Transformer (ViT) - Pytorch',
+  long_description=long_description,
  long_description_content_type = 'text/markdown',
  author = 'Phil Wang',
  author_email = 'lucidrains@gmail.com',
--- a/vit_pytorch/cross_vit.py
+++ b/vit_pytorch/cross_vit.py
@@ -170,12 +170,13 @@ class ImageEmbedder(nn.Module):
        dim,
        image_size,
        patch_size,
-        dropout = 0.
+        dropout = 0.,
+        channels = 3
    ):
        super().__init__()
        assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
-        patch_dim = 3 * patch_size ** 2
+        patch_dim = channels * patch_size ** 2

        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
@@ -223,11 +224,12 @@ class CrossViT(nn.Module):
        cross_attn_dim_head = 64,
        depth = 3,
        dropout = 0.1,
-        emb_dropout = 0.1
+        emb_dropout = 0.1,
+        channels = 3
    ):
        super().__init__()
-        self.sm_image_embedder = ImageEmbedder(dim = sm_dim, image_size = image_size, patch_size = sm_patch_size, dropout = emb_dropout)
-        self.lg_image_embedder = ImageEmbedder(dim = lg_dim, image_size = image_size, patch_size = lg_patch_size, dropout = emb_dropout)
+        self.sm_image_embedder = ImageEmbedder(dim = sm_dim, channels= channels, image_size = image_size, patch_size = sm_patch_size, dropout = emb_dropout)
+        self.lg_image_embedder = ImageEmbedder(dim = lg_dim, channels = channels, image_size = image_size, patch_size = lg_patch_size, dropout = emb_dropout)

        self.multi_scale_encoder = MultiScaleEncoder(
            depth = depth,
Author	SHA1	Message	Date
Phil Wang	12249dcc5f	address https://github.com/lucidrains/vit-pytorch/issues/304	2024-04-17 09:40:03 -07:00
SOUMYADIP MAL	8b8da8dede	Update setup.py (#303 )	2024-04-17 08:21:30 -07:00