diff --git a/README.md b/README.md
index 6814f48..484e79f 100644
--- a/README.md
+++ b/README.md
@@ -664,7 +664,7 @@ preds = v(img) # (2, 1000)
 
 <img src="./images/nest.png" width="400px"></img>
 
-This <a href="https://arxiv.org/abs/2105.12723">paper</a> decided to process the image in hierarchical stages, with attention only within tokens of local blocks, which aggregate as it moves up the heirarchy. The aggregation is done in the image plane, and contains a convolution and subsequent maxpool to allow it to pass information across the boundary.
+This <a href="https://arxiv.org/abs/2105.12723">paper</a> decided to process the image in hierarchical stages, with attention only within tokens of local blocks, which aggregate as it moves up the hierarchy. The aggregation is done in the image plane, and contains a convolution and subsequent maxpool to allow it to pass information across the boundary.
 
 You can use it with the following code (ex. NesT-T)
 
@@ -678,7 +678,7 @@ nest = NesT(
     dim = 96,
     heads = 3,
     num_hierarchies = 3,        # number of hierarchies
-    block_repeats = (2, 2, 8),  # the number of transformer blocks at each heirarchy, starting from the bottom
+    block_repeats = (2, 2, 8),  # the number of transformer blocks at each hierarchy, starting from the bottom
     num_classes = 1000
 )
 
diff --git a/examples/cats_and_dogs.ipynb b/examples/cats_and_dogs.ipynb
index 54febb7..74f40ec 100644
--- a/examples/cats_and_dogs.ipynb
+++ b/examples/cats_and_dogs.ipynb
@@ -16,7 +16,7 @@
     "\n",
     "* Dogs vs. Cats Redux: Kernels Edition - https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition\n",
     "* Base Code - https://www.kaggle.com/reukki/pytorch-cnn-tutorial-with-cats-and-dogs/\n",
-    "* Effecient Attention Implementation - https://github.com/lucidrains/vit-pytorch#efficient-attention"
+    "* Efficient Attention Implementation - https://github.com/lucidrains/vit-pytorch#efficient-attention"
    ]
   },
   {
@@ -342,7 +342,7 @@
     "id": "ZhYDJXk2SRDu"
    },
    "source": [
-    "## Image Augumentation"
+    "## Image Augmentation"
    ]
   },
   {
@@ -497,7 +497,7 @@
     "id": "TF9yMaRrSvmv"
    },
    "source": [
-    "## Effecient Attention"
+    "## Efficient Attention"
    ]
   },
   {
@@ -1307,7 +1307,7 @@
   "celltoolbar": "Edit Metadata",
   "colab": {
    "collapsed_sections": [],
-   "name": "Effecient Attention | Cats & Dogs",
+   "name": "Efficient Attention | Cats & Dogs",
    "provenance": [],
    "toc_visible": true
   },
diff --git a/vit_pytorch/mobile_vit.py b/vit_pytorch/mobile_vit.py
index c1a951f..e0b7b8b 100644
--- a/vit_pytorch/mobile_vit.py
+++ b/vit_pytorch/mobile_vit.py
@@ -13,9 +13,9 @@ def conv_1x1_bn(inp, oup):
         nn.SiLU()
     )
 
-def conv_nxn_bn(inp, oup, kernal_size=3, stride=1):
+def conv_nxn_bn(inp, oup, kernel_size=3, stride=1):
     return nn.Sequential(
-        nn.Conv2d(inp, oup, kernal_size, stride, 1, bias=False),
+        nn.Conv2d(inp, oup, kernel_size, stride, 1, bias=False),
         nn.BatchNorm2d(oup),
         nn.SiLU()
     )
diff --git a/vit_pytorch/nest.py b/vit_pytorch/nest.py
index b36da48..246efe8 100644
--- a/vit_pytorch/nest.py
+++ b/vit_pytorch/nest.py
@@ -131,7 +131,7 @@ class NesT(nn.Module):
         fmap_size = image_size // patch_size
         blocks = 2 ** (num_hierarchies - 1)
 
-        seq_len = (fmap_size // blocks) ** 2   # sequence length is held constant across heirarchy
+        seq_len = (fmap_size // blocks) ** 2   # sequence length is held constant across hierarchy
         hierarchies = list(reversed(range(num_hierarchies)))
         mults = [2 ** i for i in reversed(hierarchies)]