diff --git a/Lab2/code/1.1.py b/Lab2/code/1.1.py
index 6686983..977df95 100644
--- a/Lab2/code/1.1.py
+++ b/Lab2/code/1.1.py
@@ -1,7 +1,7 @@
 import time
 import numpy as np
 import torch
-from torch.nn.functional import one_hot, softmax
+from torch.nn.functional import *
 from torch.utils.data import Dataset, DataLoader
 from torch import nn
 from torchvision import datasets, transforms
diff --git a/Lab2/code/1.2.py b/Lab2/code/1.2.py
index 6b15a86..9bb2167 100644
--- a/Lab2/code/1.2.py
+++ b/Lab2/code/1.2.py
@@ -1,7 +1,7 @@
 import time
 import numpy as np
 import torch
-from torch.nn.functional import one_hot, softmax
+from torch.nn.functional import *
 from torch.utils.data import Dataset, DataLoader
 from torch import nn
 from torchvision import datasets, transforms
diff --git a/Lab2/code/1.3.py b/Lab2/code/1.3.py
index f6786a1..83dc9dd 100644
--- a/Lab2/code/1.3.py
+++ b/Lab2/code/1.3.py
@@ -1,7 +1,7 @@
 import time
 import numpy as np
 import torch
-from torch.nn.functional import one_hot, softmax
+from torch.nn.functional import *
 from torch.utils.data import Dataset, DataLoader
 from torch import nn
 from torchvision import datasets, transforms
diff --git a/Lab2/code/2.1.py b/Lab2/code/2.1.py
index be4271b..01fd66d 100644
--- a/Lab2/code/2.1.py
+++ b/Lab2/code/2.1.py
@@ -1,7 +1,7 @@
 import time
 import numpy as np
 import torch
-from torch.nn.functional import one_hot, softmax
+from torch.nn.functional import *
 from torch.utils.data import Dataset, DataLoader
 from torch import nn
 from torchvision import datasets, transforms
diff --git a/Lab2/code/2.2.py b/Lab2/code/2.2.py
index 77046c3..dbc424a 100644
--- a/Lab2/code/2.2.py
+++ b/Lab2/code/2.2.py
@@ -1,7 +1,7 @@
 import time
 import numpy as np
 import torch
-from torch.nn.functional import one_hot, softmax
+from torch.nn.functional import *
 from torch.utils.data import Dataset, DataLoader
 from torch import nn
 from torchvision import datasets, transforms
diff --git a/Lab2/code/2.3.py b/Lab2/code/2.3.py
index bcbe727..1132e7c 100644
--- a/Lab2/code/2.3.py
+++ b/Lab2/code/2.3.py
@@ -1,7 +1,7 @@
 import time
 import numpy as np
 import torch
-from torch.nn.functional import one_hot, softmax
+from torch.nn.functional import *
 from torch.utils.data import Dataset, DataLoader
 from torch import nn
 from torchvision import datasets, transforms
diff --git a/Lab2/code/3.py b/Lab2/code/3.py
new file mode 100644
index 0000000..b7f4ce8
--- /dev/null
+++ b/Lab2/code/3.py
@@ -0,0 +1,82 @@
+import time
+import numpy as np
+import torch
+from torch.nn.functional import *
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from torchvision import datasets, transforms
+from tqdm import tqdm
+from utils import *
+
+import ipdb
+
+class Model_3_1(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.flatten = nn.Flatten()
+        self.fc1 = nn.Linear(in_features=28 * 28, out_features=1024)
+        self.fc2 = nn.Linear(in_features=1024, out_features=256)
+        self.fc3 = nn.Linear(in_features=256, out_features=num_classes)
+        self.activate_fn = relu
+
+    def forward(self, x: torch.Tensor):
+        x = self.flatten(x)
+        x = self.fc1(x)
+        x = self.activate_fn(x)
+
+        x = self.fc2(x)
+        x = self.activate_fn(x)
+
+        x = self.fc3(x)
+        x = self.activate_fn(x)
+        return x
+    
+
+class Model_3_2(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.flatten = nn.Flatten()
+        self.fc1 = nn.Linear(in_features=28 * 28, out_features=1024)
+        self.fc2 = nn.Linear(in_features=1024, out_features=256)
+        self.fc3 = nn.Linear(in_features=256, out_features=num_classes)
+        self.activate_fn = sigmoid
+
+    def forward(self, x: torch.Tensor):
+        x = self.flatten(x)
+        x = self.fc1(x)
+        x = self.activate_fn(x)
+
+        x = self.fc2(x)
+        x = self.activate_fn(x)
+
+        x = self.fc3(x)
+        x = self.activate_fn(x)
+        return x
+    
+
+class Model_3_3(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.flatten = nn.Flatten()
+        self.fc1 = nn.Linear(in_features=28 * 28, out_features=1024)
+        self.fc2 = nn.Linear(in_features=1024, out_features=256)
+        self.fc3 = nn.Linear(in_features=256, out_features=num_classes)
+        self.activate_fn = tanh
+
+    def forward(self, x: torch.Tensor):
+        x = self.flatten(x)
+        x = self.fc1(x)
+        x = self.activate_fn(x)
+
+        x = self.fc2(x)
+        x = self.activate_fn(x)
+
+        x = self.fc3(x)
+        x = self.activate_fn(x)
+        return x
+
+
+if __name__ == "__main__":
+    train_MNIST_CLS(Model=Model_3_1)
+    train_MNIST_CLS(Model=Model_3_2)
+    train_MNIST_CLS(Model=Model_3_3)
diff --git a/Lab2/code/utils.py b/Lab2/code/utils.py
index c0a37b9..10031b9 100644
--- a/Lab2/code/utils.py
+++ b/Lab2/code/utils.py
@@ -1,7 +1,7 @@
 import time
 import numpy as np
 import torch
-from torch.nn.functional import one_hot, softmax
+from torch.nn.functional import *
 from torch.utils.data import Dataset, DataLoader
 from torch import nn
 from torchvision import datasets, transforms
@@ -102,4 +102,88 @@ class My_optimizer:
     def zero_grad(self):
         for param in self.params:
             if param.grad is not None:
-                param.grad.data = torch.zeros_like(param.grad.data)
\ No newline at end of file
+                param.grad.data = torch.zeros_like(param.grad.data)
+
+
+def train_MNIST_CLS(Model:nn.Module):
+    learning_rate = 5e-2
+    num_epochs = 10
+    batch_size = 512
+    num_classes = 10
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+    transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize((0.5,), (0.5,)),
+        ]
+    )
+    train_mnist_dataset = datasets.MNIST(root="../dataset", train=True, transform=transform, download=True)
+    test_mnist_dataset = datasets.MNIST(root="../dataset", train=False, transform=transform, download=True)
+    train_loader = DataLoader(
+        dataset=train_mnist_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=14,
+        pin_memory=True,
+    )
+    test_loader = DataLoader(
+        dataset=test_mnist_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=14,
+        pin_memory=True,
+    )
+
+    model = Model(num_classes).to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
+    
+    for epoch in range(num_epochs):
+        model.train()
+        total_epoch_loss = 0
+        start_time = time.time()
+        for index, (images, targets) in tqdm(
+            enumerate(train_loader), total=len(train_loader)
+        ):
+            optimizer.zero_grad()
+
+            images = images.to(device)
+            targets = targets.to(device)
+            one_hot_targets = one_hot(targets, num_classes=num_classes).to(dtype=torch.float)
+
+            outputs = model(images)
+            loss = criterion(outputs, one_hot_targets)
+            total_epoch_loss += loss.item()
+
+            loss.backward()
+            optimizer.step()
+
+        end_time = time.time()
+        train_time = end_time - start_time
+
+        model.eval()
+        with torch.no_grad():
+            total_epoch_acc = 0
+            start_time = time.time()
+            for index, (image, targets) in tqdm(
+                enumerate(test_loader), total=len(test_loader)
+            ):
+                image = image.to(device)
+                targets = targets.to(device)
+                
+                outputs = model(image)
+                pred = softmax(outputs, dim=1)
+                total_epoch_acc += (pred.argmax(1) == targets).sum().item()
+            
+            end_time = time.time()
+            test_time = end_time - start_time
+        
+        avg_epoch_acc = total_epoch_acc / len(test_mnist_dataset)
+        print(
+            f"Epoch [{epoch + 1}/{num_epochs}],",
+            f"Train Loss: {total_epoch_loss},",
+            f"Used Time: {train_time * 1000:.3f}ms,",
+            f"Test Acc: {avg_epoch_acc * 100:.3f}%,",
+            f"Used Time: {test_time * 1000:.3f}ms",
+        )
\ No newline at end of file