完成实验三

This commit is contained in:
Jingfan Ke 2023-11-20 23:11:01 +08:00
parent d358281472
commit 7fbb893223
11 changed files with 1276 additions and 215 deletions

View File

@ -1,14 +1,7 @@
import time
import numpy as np
import torch import torch
from torch.nn.functional import *
from torch.utils.data import Dataset, DataLoader
from torch import nn from torch import nn
from torchvision import datasets, transforms
from tqdm import tqdm
from utils import * from utils import *
import ipdb
class My_Dropout(nn.Module): class My_Dropout(nn.Module):
def __init__(self, p, **kwargs): def __init__(self, p, **kwargs):
@ -16,7 +9,7 @@ class My_Dropout(nn.Module):
self.p = p self.p = p
self.mask = None self.mask = None
def forward(self, x:torch.Tensor): def forward(self, x: torch.Tensor):
if self.training: if self.training:
self.mask = (torch.rand(x.shape) > self.p).to(dtype=torch.float32, device=x.device) self.mask = (torch.rand(x.shape) > self.p).to(dtype=torch.float32, device=x.device)
return x * self.mask / (1 - self.p) return x * self.mask / (1 - self.p)

View File

@ -1,34 +1,11 @@
import time
import numpy as np import numpy as np
import torch import torch
from torch.nn.functional import *
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torchvision import datasets, transforms
from tqdm import tqdm
from utils import * from utils import *
import ipdb
class MNIST_CLS_Model(nn.Module):
def __init__(self, num_classes, dropout_rate=0.5):
super().__init__()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(in_features=28 * 28, out_features=1024)
self.fc2 = nn.Linear(in_features=1024, out_features=num_classes)
self.dropout = nn.Dropout(p=dropout_rate)
def forward(self, x: torch.Tensor):
x = self.flatten(x)
x = torch.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
if __name__ == "__main__": if __name__ == "__main__":
learning_rate = 8e-2 learning_rate = 8e-2
num_epochs = 10 num_epochs = 101
for i in np.arange(3): for i in np.arange(3):
dropout_rate = 0.1 + 0.4 * i dropout_rate = 0.1 + 0.4 * i
model = MNIST_CLS_Model(num_classes=10, dropout_rate=dropout_rate) model = MNIST_CLS_Model(num_classes=10, dropout_rate=dropout_rate)

49
Lab3/code/2.1.py Normal file
View File

@ -0,0 +1,49 @@
import torch
from utils import *
class My_SGD:
def __init__(self, params: list[torch.Tensor], lr: float, weight_decay=0.0):
self.params = params
self.lr = lr
self.weight_decay = weight_decay
def step(self):
with torch.no_grad():
for param in self.params:
if param.grad is not None:
if len(param.data.shape) > 1:
param.data = param.data - self.lr * (param.grad + self.weight_decay * param.data)
else:
param.data = param.data - self.lr * param.grad
def zero_grad(self):
for param in self.params:
if param.grad is not None:
param.grad.data = torch.zeros_like(param.grad.data)
if __name__ == "__main__":
params1 = torch.tensor([[1.0, 2.0]], requires_grad=True)
params2 = torch.tensor([[1.0, 2.0]], requires_grad=True)
my_sgd = My_SGD(params=[params1], lr=0.5, weight_decay=0.1)
optim_sgd = torch.optim.SGD(params=[params2], lr=0.5, weight_decay=0.1)
my_sgd.zero_grad()
optim_sgd.zero_grad()
loss1 = 2 * params1.sum()
loss2 = 2 * params2.sum()
# 偏导为2
loss1.backward()
loss2.backward()
print("params1的梯度为\n", params1.grad.data)
print("params2的梯度为\n", params2.grad.data)
my_sgd.step()
optim_sgd.step()
# 结果为w - lr * grad - lr * weight_decay_rate * w
# w[0] = 1 - 0.5 * 2 - 0.5 * 0.1 * 1 = -0.0500
# w[1] = 2 - 0.5 * 2 - 0.5 * 0.1 * 2 = 0.9000
print("经过L_2正则化后的My_SGD反向传播结果\n", params1.data)
print("经过L_2正则化后的torch.optim.SGD反向传播结果\n", params2.data)

15
Lab3/code/2.2.py Normal file
View File

@ -0,0 +1,15 @@
import numpy as np
import torch
from utils import *
if __name__ == "__main__":
learning_rate = 8e-2
num_epochs = 101
color = ["blue", "green", "orange", "purple"]
for i in np.arange(4):
weight_decay_rate = i / 4 * 0.01
model = MNIST_CLS_Model(num_classes=10, dropout_rate=0)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay_rate)
print(f"weight_decay_rate={weight_decay_rate}")
train_loss, test_acc = train_MNIST_CLS(model, optimizer, num_epochs=num_epochs)

64
Lab3/code/3.1.py Normal file
View File

@ -0,0 +1,64 @@
import torch
from utils import *
# 手动实现torch.optim.SGD
class My_SGD:
def __init__(self, params: list[torch.Tensor], lr: float, weight_decay=0.0, momentum=0.0):
self.params = params
self.lr = lr
self.weight_decay = weight_decay
self.momentum = momentum
self.velocities = [torch.zeros_like(param.data) for param in params]
def step(self):
with torch.no_grad():
for index, param in enumerate(self.params):
if param.grad is not None:
if self.weight_decay > 0:
if len(param.data.shape) > 1:
param.grad.data = (param.grad.data + self.weight_decay * param.data)
self.velocities[index] = (self.momentum * self.velocities[index] - self.lr * param.grad)
param.data = param.data + self.velocities[index]
def zero_grad(self):
for param in self.params:
if param.grad is not None:
param.grad.data = torch.zeros_like(param.grad.data)
if __name__ == "__main__":
params1 = torch.tensor([[1.0, 2.0]], requires_grad=True)
params2 = torch.tensor([[1.0, 2.0]], requires_grad=True)
my_sgd = My_SGD(params=[params1], lr=0.5, momentum=1)
optim_sgd = torch.optim.SGD(params=[params2], lr=0.5, momentum=1)
my_sgd.zero_grad()
optim_sgd.zero_grad()
loss1 = 2 * params1.sum()
loss2 = 2 * params2.sum()
# 偏导为2
loss1.backward()
loss2.backward()
my_sgd.step()
optim_sgd.step()
# 结果为w - lr * grad + momentum * velocity
# w[0] = 1 - 0.5 * 2 + 1 * 0 = 0
# w[1] = 2 - 0.5 * 2 + 1 * 0 = 1
print("My_SGD第1次反向传播结果\n", params1.data)
print("torch.optim.SGD第1次反向传播结果\n", params2.data)
my_sgd.zero_grad()
optim_sgd.zero_grad()
loss1 = -3 * params1.sum()
loss2 = -3 * params2.sum()
loss1.backward()
loss2.backward()
my_sgd.step()
optim_sgd.step()
# 结果为w - lr * grad + momentum * velocity
# w[0] = 0 - 0.5 * -3 + 1 * (-0.5 * 2) = 0.5
# w[1] = 1 - 0.5 * -3 + 1 * (-0.5 * 2) = 1.5
print("My_SGD第2次反向传播结果\n", params1.data)
print("torch.optim.SGD第2次反向传播结果\n", params2.data)

63
Lab3/code/3.2.py Normal file
View File

@ -0,0 +1,63 @@
import torch
from utils import *
class My_RMSprop:
def __init__(self, params: list[torch.Tensor], lr=1e-2, alpha=0.99, eps=1e-8):
self.params = params
self.lr = lr
self.alpha = alpha
self.eps = eps
self.square_avg = [torch.zeros_like(param.data) for param in params]
def step(self):
with torch.no_grad():
for index, param in enumerate(self.params):
if param.grad is not None:
self.square_avg[index] = self.alpha * self.square_avg[index] + (1 - self.alpha) * param.grad ** 2
param.data = param.data - self.lr * param.grad / torch.sqrt(self.square_avg[index] + self.eps)
def zero_grad(self):
for param in self.params:
if param.grad is not None:
param.grad.data = torch.zeros_like(param.grad.data)
if __name__ == "__main__":
params1 = torch.tensor([[1.0, 2.0]], requires_grad=True)
params2 = torch.tensor([[1.0, 2.0]], requires_grad=True)
my_sgd = My_RMSprop(params=[params1], lr=1, alpha=0.5, eps=1e-8)
optim_sgd = torch.optim.RMSprop(params=[params2], lr=1, alpha=0.5, eps=1e-8)
my_sgd.zero_grad()
optim_sgd.zero_grad()
loss1 = 2 * params1.sum()
loss2 = 2 * params2.sum()
# 偏导为2
loss1.backward()
loss2.backward()
my_sgd.step()
optim_sgd.step()
# s = alpha * s + (1-alpha) * grad^2 = 0.5 * 0 + (1-0.5) * 2^2 = 2
# w = w - lr * grad * (s + eps)^0.5
# w[0] = 1 - 1 * 2 / (2 + 1e-8)^0.5 ~= -0.41
# w[1] = 2 - 1 * 2 / (2 + 1e-8)^0.5 ~= -0.59
print("My_RMSprop第1次反向传播结果\n", params1.data)
print("torch.optim.RMSprop第1次反向传播结果\n", params2.data)
my_sgd.zero_grad()
optim_sgd.zero_grad()
loss1 = -3 * params1.sum()
loss2 = -3 * params2.sum()
loss1.backward()
loss2.backward()
my_sgd.step()
optim_sgd.step()
# s = alpha * s + (1-alpha) * grad^2 = 0.5 * 2 + (1-0.5) * (-3)^2 = 5.5
# w - lr * grad * (s + eps)^0.5
# w[0] = -0.41 - 1 * -3 / (5.5 + 1e-8)^0.5 ~= 0.87
# w[1] = 0.59 - 1 * -3 / (5.5 + 1e-8)^0.5 ~= 1.86
print("My_RMSprop第2次反向传播结果\n", params1.data)
print("torch.optim.RMSprop第2次反向传播结果\n", params2.data)

64
Lab3/code/3.3.py Normal file
View File

@ -0,0 +1,64 @@
import torch
from utils import *
class My_Adam:
def __init__(self, params: list[torch.Tensor], lr=1e-3, betas=(0.9, 0.999), eps=1e-8):
self.params = params
self.lr = lr
self.beta1 = betas[0]
self.beta2 = betas[1]
self.eps = eps
self.t = 0
self.momentums = [torch.zeros_like(param.data) for param in params]
self.velocities = [torch.zeros_like(param.data) for param in params]
def step(self):
self.t += 1
with torch.no_grad():
for index, param in enumerate(self.params):
if param.grad is not None:
self.momentums[index] = (self.beta1 * self.momentums[index] + (1 - self.beta1) * param.grad)
self.velocities[index] = (self.beta2 * self.velocities[index] + (1 - self.beta2) * param.grad ** 2)
momentums_hat = self.momentums[index] / (1 - self.beta1 ** self.t)
velocities_hat = self.velocities[index] / (1 - self.beta2 ** self.t)
param.data = param.data - self.lr * momentums_hat / (torch.sqrt(velocities_hat) + self.eps)
def zero_grad(self):
for param in self.params:
if param.grad is not None:
param.grad.data = torch.zeros_like(param.grad.data)
if __name__ == "__main__":
params1 = torch.tensor([[1.0, 2.0]], requires_grad=True)
params2 = torch.tensor([[1.0, 2.0]], requires_grad=True)
my_sgd = My_Adam(params=[params1], lr=1, betas=(0.5, 0.5), eps=1e-8)
optim_sgd = torch.optim.Adam(params=[params2], lr=1, betas=(0.5, 0.5), eps=1e-8)
my_sgd.zero_grad()
optim_sgd.zero_grad()
loss1 = 2 * params1.sum()
loss2 = 2 * params2.sum()
# 偏导为2
loss1.backward()
loss2.backward()
my_sgd.step()
optim_sgd.step()
print("My_Adam第1次反向传播结果\n", params1.data)
print("torch.optim.Adam第1次反向传播结果\n", params2.data)
my_sgd.zero_grad()
optim_sgd.zero_grad()
loss1 = -3 * params1.sum()
loss2 = -3 * params2.sum()
# 偏导为-3
loss1.backward()
loss2.backward()
my_sgd.step()
optim_sgd.step()
print("My_Adam第2次反向传播结果\n", params1.data)
print("torch.optim.Adam第2次反向传播结果\n", params2.data)

23
Lab3/code/3.4.py Normal file
View File

@ -0,0 +1,23 @@
import torch
from utils import *
if __name__ == "__main__":
learning_rate = 5e-2
num_epochs = 161
color = ["blue", "green", "orange"]
optim_names = ["SGD", "RMSprop", "Adam"]
model = MNIST_CLS_Model(num_classes=10, dropout_rate=0)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
print(f"optimizer: SGD")
train_loss, test_acc = train_MNIST_CLS(model, optimizer, num_epochs=num_epochs)
model = MNIST_CLS_Model(num_classes=10, dropout_rate=0)
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, alpha=0.99, eps=1e-8)
print(f"optimizer: RMSprop")
train_loss, test_acc = train_MNIST_CLS(model, optimizer, num_epochs=num_epochs)
model = MNIST_CLS_Model(num_classes=10, dropout_rate=0)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-8)
print(f"optimizer: Adam")
train_loss, test_acc = train_MNIST_CLS(model, optimizer, num_epochs=num_epochs)

101
Lab3/code/4.py Normal file
View File

@ -0,0 +1,101 @@
import torch
from torch import nn
from utils import *
from torch.utils.data import random_split
learning_rate = 1e-3
num_epochs = 161
batch_size = 8192
num_classes = 10
device = "cuda:0" if torch.cuda.is_available() else "cpu"
transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)),
]
)
train_mnist_dataset = datasets.MNIST(root="../dataset", train=True, transform=transform, download=True)
test_mnist_dataset = datasets.MNIST(root="../dataset", train=False, transform=transform, download=True)
train_dataset_length = int(0.8 * len(train_mnist_dataset))
val_dataset_length = len(train_mnist_dataset) - train_dataset_length
train_mnist_dataset, val_mnist_dataset = random_split(
train_mnist_dataset,
[train_dataset_length, val_dataset_length],
generator=torch.Generator().manual_seed(42),
)
train_loader = DataLoader(dataset=train_mnist_dataset, batch_size=batch_size, shuffle=True, num_workers=14, pin_memory=True)
val_loader = DataLoader(dataset=val_mnist_dataset, batch_size=batch_size, shuffle=True, num_workers=14, pin_memory=True)
test_loader = DataLoader(dataset=test_mnist_dataset, batch_size=batch_size, shuffle=True, num_workers=14, pin_memory=True)
model = MNIST_CLS_Model(num_classes=10, dropout_rate=0.2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)
early_stopping_patience = 5
best_val_loss = float("inf")
current_patience = 0
train_loss = list()
test_acc = list()
val_loss = list()
for epoch in range(num_epochs):
model.train()
total_epoch_loss = 0
for index, (images, targets) in tqdm(enumerate(train_loader), total=len(train_loader)):
optimizer.zero_grad()
images = images.to(device)
targets = targets.to(device)
one_hot_targets = one_hot(targets, num_classes=num_classes).to(dtype=torch.float)
outputs = model(images)
loss = criterion(outputs, one_hot_targets)
total_epoch_loss += loss.item()
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
total_epoch_acc = 0
for index, (image, targets) in tqdm(enumerate(test_loader), total=len(test_loader)):
image = image.to(device)
targets = targets.to(device)
outputs = model(image)
pred = softmax(outputs, dim=1)
total_epoch_acc += (pred.argmax(1) == targets).sum().item()
avg_epoch_acc = total_epoch_acc / len(test_mnist_dataset)
val_total_epoch_loss = 0
for index, (image, targets) in tqdm(enumerate(val_loader), total=len(test_loader)):
image = image.to(device)
targets = targets.to(device)
one_hot_targets = one_hot(targets, num_classes=num_classes).to(dtype=torch.float)
outputs = model(image)
loss = criterion(outputs, one_hot_targets)
val_total_epoch_loss += loss.item()
print(
f"Epoch [{epoch + 1}/{num_epochs}],",
f"Train Loss: {total_epoch_loss:.10f},",
f"Test Acc: {avg_epoch_acc * 100:.3f}%,",
f"Val Loss: {val_total_epoch_loss:.10f}",
)
train_loss.append(total_epoch_loss)
test_acc.append(avg_epoch_acc * 100)
val_loss.append(val_total_epoch_loss)
if val_total_epoch_loss < best_val_loss:
best_val_loss = val_total_epoch_loss
current_patience = 0
else:
current_patience += 1
if current_patience >= early_stopping_patience:
print(f"Early stopping after {epoch + 1} epochs.")
break

View File

@ -1,8 +1,6 @@
import time
import numpy as np
import torch import torch
from torch.nn.functional import * from torch.nn.functional import *
from torch.utils.data import Dataset, DataLoader from torch.utils.data import DataLoader
from torch import nn from torch import nn
from torchvision import datasets, transforms from torchvision import datasets, transforms
from tqdm import tqdm from tqdm import tqdm
@ -10,120 +8,24 @@ from tqdm import tqdm
import ipdb import ipdb
# 手动实现torch.nn.functional.one_hot class MNIST_CLS_Model(nn.Module):
def my_one_hot(indices: torch.Tensor, num_classes: int): def __init__(self, num_classes, dropout_rate=0.5):
one_hot_tensor = torch.zeros(len(indices), num_classes, dtype=torch.long).to(indices.device) super().__init__()
one_hot_tensor.scatter_(1, indices.view(-1, 1), 1) self.flatten = nn.Flatten()
return one_hot_tensor self.fc1 = nn.Linear(in_features=28 * 28, out_features=1024)
self.fc2 = nn.Linear(in_features=1024, out_features=num_classes)
self.dropout = nn.Dropout(p=dropout_rate)
def forward(self, x: torch.Tensor):
# 手动实现torch.nn.functional.softmax x = self.flatten(x)
def my_softmax(predictions: torch.Tensor, dim: int): x = torch.relu(self.fc1(x))
max_values = torch.max(predictions, dim=dim, keepdim=True).values x = self.dropout(x)
exp_values = torch.exp(predictions - max_values) x = self.fc2(x)
softmax_output = exp_values / torch.sum(exp_values, dim=dim, keepdim=True)
return softmax_output
# 手动实现torch.nn.Linear
class My_Linear:
def __init__(self, in_features: int, out_features: int):
self.weight = torch.normal(mean=0.001, std=0.5, size=(out_features, in_features), requires_grad=True, dtype=torch.float32)
self.bias = torch.normal(mean=0.001, std=0.5, size=(1,), requires_grad=True, dtype=torch.float32)
self.params = [self.weight, self.bias]
def __call__(self, x):
return self.forward(x)
def forward(self, x):
x = torch.matmul(x, self.weight.T) + self.bias
return x return x
def to(self, device: str):
for param in self.params:
param.data = param.data.to(device=device)
return self
def parameters(self):
return self.params
# 手动实现torch.nn.Flatten
class My_Flatten:
def __call__(self, x: torch.Tensor):
x = x.view(x.shape[0], -1)
return x
# 手动实现torch.nn.ReLU
class My_ReLU():
def __call__(self, x: torch.Tensor):
x = torch.max(x, torch.tensor(0.0, device=x.device))
return x
# 手动实现torch.nn.Sigmoid
class My_Sigmoid():
def __call__(self, x: torch.Tensor):
x = 1. / (1. + torch.exp(-x))
return x
# 手动实现torch.nn.BCELoss
class My_BCELoss:
def __call__(self, prediction: torch.Tensor, target: torch.Tensor):
loss = -torch.mean(target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction))
return loss
# 手动实现torch.nn.CrossEntropyLoss
class My_CrossEntropyLoss:
def __call__(self, predictions: torch.Tensor, targets: torch.Tensor):
max_values = torch.max(predictions, dim=1, keepdim=True).values
exp_values = torch.exp(predictions - max_values)
softmax_output = exp_values / torch.sum(exp_values, dim=1, keepdim=True)
log_probs = torch.log(softmax_output)
nll_loss = -torch.sum(targets * log_probs, dim=1)
average_loss = torch.mean(nll_loss)
return average_loss
# 手动实现损失函数
class My_optimizer:
def __init__(self, params: list[torch.Tensor], lr: float):
self.params = params
self.lr = lr
def step(self):
with torch.no_grad():
for param in self.params:
param.data = param.data - self.lr * param.grad.data
def zero_grad(self):
for param in self.params:
if param.grad is not None:
param.grad.data = torch.zeros_like(param.grad.data)
# 手动实现torch.optim.SGD
class My_SGD:
def __init__(self, params: list[torch.Tensor], lr: float, weight_decay=0):
self.params = params
self.lr = lr
self.weight_decay = weight_decay
def step(self):
with torch.no_grad():
for param in self.params:
param.data = param.data - self.lr * param.grad.data
def zero_grad(self):
for param in self.params:
if param.grad is not None:
param.grad.data = torch.zeros_like(param.grad.data)
def train_MNIST_CLS(model, optimizer, num_epochs): def train_MNIST_CLS(model, optimizer, num_epochs):
batch_size = 512 batch_size = 8192
num_classes = 10 num_classes = 10
device = "cuda:0" if torch.cuda.is_available() else "cpu" device = "cuda:0" if torch.cuda.is_available() else "cpu"
@ -133,8 +35,12 @@ def train_MNIST_CLS(model, optimizer, num_epochs):
transforms.Normalize((0.5,), (0.5,)), transforms.Normalize((0.5,), (0.5,)),
] ]
) )
train_mnist_dataset = datasets.MNIST(root="../dataset", train=True, transform=transform, download=True) train_mnist_dataset = datasets.MNIST(
test_mnist_dataset = datasets.MNIST(root="../dataset", train=False, transform=transform, download=True) root="../dataset", train=True, transform=transform, download=True
)
test_mnist_dataset = datasets.MNIST(
root="../dataset", train=False, transform=transform, download=True
)
train_loader = DataLoader( train_loader = DataLoader(
dataset=train_mnist_dataset, dataset=train_mnist_dataset,
batch_size=batch_size, batch_size=batch_size,
@ -158,7 +64,6 @@ def train_MNIST_CLS(model, optimizer, num_epochs):
for epoch in range(num_epochs): for epoch in range(num_epochs):
model.train() model.train()
total_epoch_loss = 0 total_epoch_loss = 0
start_time = time.time()
for index, (images, targets) in tqdm( for index, (images, targets) in tqdm(
enumerate(train_loader), total=len(train_loader) enumerate(train_loader), total=len(train_loader)
): ):
@ -166,7 +71,9 @@ def train_MNIST_CLS(model, optimizer, num_epochs):
images = images.to(device) images = images.to(device)
targets = targets.to(device) targets = targets.to(device)
one_hot_targets = one_hot(targets, num_classes=num_classes).to(dtype=torch.float) one_hot_targets = one_hot(targets, num_classes=num_classes).to(
dtype=torch.float
)
outputs = model(images) outputs = model(images)
loss = criterion(outputs, one_hot_targets) loss = criterion(outputs, one_hot_targets)
@ -175,13 +82,9 @@ def train_MNIST_CLS(model, optimizer, num_epochs):
loss.backward() loss.backward()
optimizer.step() optimizer.step()
end_time = time.time()
train_time = end_time - start_time
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
total_epoch_acc = 0 total_epoch_acc = 0
start_time = time.time()
for index, (image, targets) in tqdm( for index, (image, targets) in tqdm(
enumerate(test_loader), total=len(test_loader) enumerate(test_loader), total=len(test_loader)
): ):
@ -192,16 +95,11 @@ def train_MNIST_CLS(model, optimizer, num_epochs):
pred = softmax(outputs, dim=1) pred = softmax(outputs, dim=1)
total_epoch_acc += (pred.argmax(1) == targets).sum().item() total_epoch_acc += (pred.argmax(1) == targets).sum().item()
end_time = time.time()
test_time = end_time - start_time
avg_epoch_acc = total_epoch_acc / len(test_mnist_dataset) avg_epoch_acc = total_epoch_acc / len(test_mnist_dataset)
print( print(
f"Epoch [{epoch + 1}/{num_epochs}],", f"Epoch [{epoch + 1}/{num_epochs}],",
f"Train Loss: {total_epoch_loss:.10f},", f"Train Loss: {total_epoch_loss:.10f},",
f"Used Time: {train_time * 1000:.3f}ms,",
f"Test Acc: {avg_epoch_acc * 100:.3f}%,", f"Test Acc: {avg_epoch_acc * 100:.3f}%,",
f"Used Time: {test_time * 1000:.3f}ms",
) )
train_loss.append(total_epoch_loss) train_loss.append(total_epoch_loss)
test_acc.append(avg_epoch_acc * 100) test_acc.append(avg_epoch_acc * 100)

File diff suppressed because one or more lines are too long