commit 175bf7b7da010f2d4e900135bfce85fef91dc323
Author: kejingfan <jingfan_ke@bjtu.edu.cn>
Date:   Tue Oct 10 09:42:31 2023 +0800

    first commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..93db21b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+dataset/
diff --git a/Lab1/.ipynb_checkpoints/Pytorch基本操作实验报告-checkpoint.ipynb b/Lab1/.ipynb_checkpoints/Pytorch基本操作实验报告-checkpoint.ipynb
new file mode 100644
index 0000000..c0645c6
--- /dev/null
+++ b/Lab1/.ipynb_checkpoints/Pytorch基本操作实验报告-checkpoint.ipynb
@@ -0,0 +1,1262 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3b57686b-7ac8-4897-bf76-3d982b1ff8da",
+   "metadata": {},
+   "source": [
+    "<h1><center>本科生《深度学习》课程<br>实验报告</center></h1>\n",
+    "<div style=\"text-align: center;\">\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">课程名称</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">深度学习</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">实验题目</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">Pytorch基本操作</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">学号</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">21281280</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">姓名</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">柯劲帆</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">班级</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">物联网2101班</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">指导老师</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">张淳杰</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">报告日期</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">2023年10月9日</span></div>\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e24aa17e-faf9-4d69-9eae-43159116b56f",
+   "metadata": {},
+   "source": [
+    "实验环境：\n",
+    "- OS：Ubuntu 22.04 内核版本 6.2.0-34-generic\n",
+    "- CPU：12th Gen Intel(R) Core(TM) i7-12700H\n",
+    "- GPU：NVIDIA GeForce RTX 3070 Ti Laptop\n",
+    "- conda: miniconda 23.9.0\n",
+    "- python：3.10.13\n",
+    "- pytorch：2.1.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a4e12268-bad4-44c4-92d5-883624d93e25",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch.autograd import Variable\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from torch import nn\n",
+    "from torchvision import datasets, transforms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cc7f0ce5-d613-425b-807c-78115632cd80",
+   "metadata": {},
+   "source": [
+    "引用相关库。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59a43d35-56ac-4ade-995d-1c6fcbcd1262",
+   "metadata": {},
+   "source": [
+    "# 一、Pytorch基本操作考察\n",
+    "## 题目2\n",
+    "**使用 𝐓𝐞𝐧𝐬𝐨𝐫 初始化一个 𝟏×𝟑 的矩阵 𝑴 和一个 𝟐×𝟏 的矩阵 𝑵，对两矩阵进行减法操作（要求实现三种不同的形式），给出结果并分析三种方式的不同（如果出现报错，分析报错的原因），同时需要指出在计算过程中发生了什么。**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "79ea46db-cf49-436c-9b5b-c6562d0da9e2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "方法1的结果:\n",
+      "tensor([[-3, -2, -1],\n",
+      "        [-4, -3, -2]])\n",
+      "方法2的结果:\n",
+      "tensor([[-3, -2, -1],\n",
+      "        [-4, -3, -2]])\n",
+      "方法3的结果:\n",
+      "tensor([[-3, -2, -1],\n",
+      "        [-4, -3, -2]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "A = torch.tensor([[1, 2, 3]])\n",
+    "\n",
+    "B = torch.tensor([[4],\n",
+    "                  [5]])\n",
+    "\n",
+    "# 方法1: 使用PyTorch的减法操作符\n",
+    "result1 = A - B\n",
+    "\n",
+    "# 方法2: 使用PyTorch的sub函数\n",
+    "result2 = torch.sub(A, B)\n",
+    "\n",
+    "# 方法3: 手动实现广播机制并作差\n",
+    "def mysub(a:torch.Tensor, b:torch.Tensor):\n",
+    "    if not (\n",
+    "        (a.size(0) == 1 and b.size(1) == 1) \n",
+    "        or \n",
+    "        (a.size(1) == 1 and b.size(0) == 1)\n",
+    "        ):\n",
+    "        raise ValueError(\"输入的张量大小无法满足广播机制的条件。\")\n",
+    "    else:\n",
+    "        target_shape = torch.Size([max(A.size(0), B.size(0)), max(A.size(1), B.size(1))])\n",
+    "        A_broadcasted = A.expand(target_shape)\n",
+    "        B_broadcasted = B.expand(target_shape)\n",
+    "        result = torch.zeros(target_shape, dtype=torch.int64).to(device=A_broadcasted.device)\n",
+    "        for i in range(target_shape[0]):\n",
+    "            for j in range(target_shape[1]):\n",
+    "                result[i, j] = A_broadcasted[i, j] - B_broadcasted[i, j]\n",
+    "        return result\n",
+    "\n",
+    "result3 = mysub(A, B)\n",
+    "\n",
+    "print(\"方法1的结果:\")\n",
+    "print(result1)\n",
+    "print(\"方法2的结果:\")\n",
+    "print(result2)\n",
+    "print(\"方法3的结果:\")\n",
+    "print(result3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2489a3ad-f6ff-4561-bb26-e02654090b98",
+   "metadata": {},
+   "source": [
+    "## 题目2\n",
+    "1. **利用Tensor创建两个大小分别3*2和4*2的随机数矩阵P和Q，要求服从均值为0，标准差0.01为的正态分布；**\n",
+    "2. **对第二步得到的矩阵Q进行形状变换得到Q的转置Q^T；**\n",
+    "3. **对上述得到的矩阵P和矩阵Q^T求矩阵相乘。**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "41e4ee02-1d05-4101-b3f0-477bac0277fb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "矩阵 P:\n",
+      "tensor([[ 0.0053,  0.0013],\n",
+      "        [-0.0086,  0.0136],\n",
+      "        [-0.0013,  0.0176]])\n",
+      "矩阵 Q:\n",
+      "tensor([[ 0.0044,  0.0014],\n",
+      "        [ 0.0147,  0.0078],\n",
+      "        [-0.0002, -0.0023],\n",
+      "        [ 0.0001, -0.0011]])\n",
+      "矩阵 QT:\n",
+      "tensor([[ 0.0044,  0.0147, -0.0002,  0.0001],\n",
+      "        [ 0.0014,  0.0078, -0.0023, -0.0011]])\n",
+      "矩阵相乘的结果:\n",
+      "tensor([[ 2.4953e-05,  8.7463e-05, -3.8665e-06, -8.9576e-07],\n",
+      "        [-1.9514e-05, -2.0557e-05, -2.9649e-05, -1.5913e-05],\n",
+      "        [ 1.8189e-05,  1.1834e-04, -4.0097e-05, -1.9608e-05]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "mean = 0\n",
+    "stddev = 0.01\n",
+    "\n",
+    "P = torch.normal(mean=mean, std=stddev, size=(3, 2))\n",
+    "Q = torch.normal(mean=mean, std=stddev, size=(4, 2))\n",
+    "\n",
+    "print(\"矩阵 P:\")\n",
+    "print(P)\n",
+    "print(\"矩阵 Q:\")\n",
+    "print(Q)\n",
+    "\n",
+    "# 对矩阵Q进行转置操作，得到矩阵Q的转置Q^T\n",
+    "QT = Q.T\n",
+    "print(\"矩阵 QT:\")\n",
+    "print(QT)\n",
+    "\n",
+    "# 计算矩阵P和矩阵Q^T的矩阵相乘\n",
+    "result = torch.matmul(P, QT)\n",
+    "print(\"矩阵相乘的结果:\")\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cea9cb6d-adde-4e08-b9f2-8c417abf4231",
+   "metadata": {},
+   "source": [
+    "## 题目2\n",
+    "**给定公式$ y_3=y_1+y_2=𝑥^2+𝑥^3$，且$x=1$。利用学习所得到的Tensor的相关知识，求$y_3$对$x$的梯度，即$\\frac{dy_3}{dx}$。**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "951512cd-d915-4d04-959f-eb99d1971e2d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "梯度(dy_3/dx):  2.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "x = torch.tensor(1.0, requires_grad=True)\n",
+    "y_1 = x ** 2\n",
+    "with torch.no_grad():\n",
+    "    y_2 = x**3\n",
+    "\n",
+    "y3 = y_1 + y_2\n",
+    "\n",
+    "y3.backward()\n",
+    "\n",
+    "print(\"梯度(dy_3/dx): \", x.grad.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3269dbf6-889a-49eb-8094-1e588e1a6c30",
+   "metadata": {},
+   "source": [
+    "# 二、动手实现logistic回归\n",
+    "## 题目1\n",
+    "**要求动手从0实现 logistic 回归（只借助Tensor和Numpy相关的库）在人工构造的数据集上进行训练和测试，并从loss以及训练集上的准确率等多个角度对结果进行分析（可借助nn.BCELoss或nn.BCEWithLogitsLoss作为损失函数，从零实现二元交叉熵为选作）**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bcd12aa9-f187-4d88-8c59-af6d16107edb",
+   "metadata": {},
+   "source": [
+    "给定预测概率$ \\left( \\hat{y} \\right) $和目标标签$ \\left( y \\right)$（通常是0或1），BCELoss的计算公式如下：\n",
+    "$$\n",
+    " \\text{BCELoss}(\\hat{y}, y) = -\\frac{1}{N} \\sum_{i=1}^{N} \\left(y_i \\cdot \\log(\\hat{y}_i) + (1 - y_i) \\cdot \\log(1 - \\hat{y}_i)\\right) \n",
+    "$$\n",
+    "其中，$\\left( N \\right) $是样本数量，$\\left( \\hat{y}_i \\right) $表示模型的预测概率向量中的第$ \\left( i \\right) $个元素，$\\left( y_i \\right) $表示实际的目标标签中的第$ \\left( i \\right) $个元素。在二分类问题中，$\\left( y_i \\right) $通常是0或1。这个公式表示对所有样本的二分类交叉熵损失进行了求和并取平均。\n",
+    "\n",
+    "因此BCELoss的手动实现如下。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e31b86ec-4114-48dd-8d73-fe4e0686419a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "输入：\n",
+      "tensor([0.6900])\n",
+      "标签：\n",
+      "tensor([1.])\n",
+      "My_BCELoss损失值: 0.37110066413879395\n",
+      "nn.BCELoss损失值: 0.37110066413879395\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_BCELoss:\n",
+    "    def __call__(self, prediction: torch.Tensor, target: torch.Tensor):\n",
+    "        loss = -torch.mean(target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction))\n",
+    "        return loss\n",
+    "\n",
+    "\n",
+    "# 测试\n",
+    "prediction = torch.sigmoid(torch.tensor([0.8]))\n",
+    "target = torch.tensor([1.0])\n",
+    "print(f\"输入：\\n{prediction}\")\n",
+    "print(f\"标签：\\n{target}\")\n",
+    "\n",
+    "my_bce_loss = My_BCELoss()\n",
+    "my_loss = my_bce_loss(prediction, target)\n",
+    "print(\"My_BCELoss损失值:\", my_loss.item())\n",
+    "\n",
+    "nn_bce_loss = nn.BCELoss()\n",
+    "nn_loss = nn_bce_loss(prediction, target)\n",
+    "print(\"nn.BCELoss损失值:\", nn_loss.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "345b0300-8808-4c43-9bf9-05a7e6e1f5af",
+   "metadata": {},
+   "source": [
+    "Optimizer的实现较为简单。\n",
+    "\n",
+    "主要实现：\n",
+    "- 传入参数：`__init__()`\n",
+    "- 对传入的参数进行更新：`step()`\n",
+    "- 清空传入参数存储的梯度：`zero_grad()`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0297066c-9fc1-448d-bdcb-29a6f1519117",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "y.backward()之后，x的梯度:  2.0\n",
+      "optimizer_test.step()之后，x的值:  0.800000011920929\n",
+      "optimizer_test.zero_grad()之后，x的梯度:  0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_optimizer:\n",
+    "    def __init__(self, params: list[torch.Tensor], lr: float):\n",
+    "        self.params = params\n",
+    "        self.lr = lr\n",
+    "\n",
+    "    def step(self):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data - self.lr * param.grad.data\n",
+    "\n",
+    "    def zero_grad(self):\n",
+    "        for param in self.params:\n",
+    "            if param.grad is not None:\n",
+    "                param.grad.data.zero_()\n",
+    "\n",
+    "\n",
+    "# 测试\n",
+    "x = torch.tensor(1.0, requires_grad=True)\n",
+    "y = x ** 2\n",
+    "optimizer_test = My_optimizer([x], lr=0.1)\n",
+    "\n",
+    "y.backward()\n",
+    "print(\"y.backward()之后，x的梯度: \", x.grad.item())\n",
+    "\n",
+    "optimizer_test.step()\n",
+    "print(\"optimizer_test.step()之后，x的值: \", x.item())\n",
+    "\n",
+    "optimizer_test.zero_grad()\n",
+    "print(\"optimizer_test.zero_grad()之后，x的梯度: \", x.grad.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ab83528-a88b-4d66-b0c9-b1315cf75c22",
+   "metadata": {},
+   "source": [
+    "线性层主要有一个权重（weight)和一个偏置（bias）。\n",
+    "线性层的数学公式如下：\n",
+    "$$\n",
+    "x:=x \\times weight^T+bias\n",
+    "$$\n",
+    "因此代码实现如下："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "8e18695a-d8c5-4f77-8b5c-de40d9240fb9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "输入：\n",
+      "tensor([[1.],\n",
+      "        [2.]], requires_grad=True)\n",
+      "权重：\n",
+      "tensor([[-1.0980],\n",
+      "        [-0.5413],\n",
+      "        [ 1.5884]], requires_grad=True)\n",
+      "偏置：\n",
+      "tensor([-1.1733], requires_grad=True)\n",
+      "输出：\n",
+      "tensor([[-2.2713, -1.7146,  0.4151],\n",
+      "        [-3.3692, -2.2559,  2.0036]], grad_fn=<AddBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_Linear:\n",
+    "    def __init__(self, input_feature: int, output_feature: int):\n",
+    "        self.weight = torch.randn((output_feature, input_feature), requires_grad=True, dtype=torch.float32)\n",
+    "        self.bias = torch.randn(1, requires_grad=True, dtype=torch.float32)\n",
+    "        self.params = [self.weight, self.bias]\n",
+    "\n",
+    "    def __call__(self, x: torch.Tensor):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = torch.matmul(x, self.weight.T) + self.bias\n",
+    "        return x\n",
+    "\n",
+    "    def to(self, device: str):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data.to(device=device)\n",
+    "        return self\n",
+    "\n",
+    "    def parameters(self):\n",
+    "        return self.params\n",
+    "\n",
+    "        \n",
+    "# 测试\n",
+    "linear_test = My_Linear(1, 3)\n",
+    "x = torch.tensor([[1.], [2.]], requires_grad=True)\n",
+    "print(f\"输入：\\n{x}\")\n",
+    "print(f\"权重：\\n{linear_test.weight}\")\n",
+    "print(f\"偏置：\\n{linear_test.bias}\")\n",
+    "y = linear_test(x)\n",
+    "print(f\"输出：\\n{y}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ff813cc-c1f0-4c73-a3e8-d6796ef5d366",
+   "metadata": {},
+   "source": [
+    "手动实现logistic回归模型。\n",
+    "\n",
+    "模型很简单，主要由一个线性层和一个sigmoid层组成。\n",
+    "\n",
+    "Sigmoid函数（又称为 Logistic函数）是一种常用的激活函数，通常用于神经网络的输出层或隐藏层，其作用是将输入的实数值压缩到一个范围在0和1之间的数值。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e7de7e4b-a084-4793-812e-46e8550ecd8d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_2_1():\n",
+    "    def __init__(self):\n",
+    "        self.linear = My_Linear(1, 1)\n",
+    "        self.params = self.linear.params\n",
+    "\n",
+    "    def __call__(self, x):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.linear(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x\n",
+    "\n",
+    "    def to(self, device: str):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data.to(device=device)\n",
+    "        return self\n",
+    "\n",
+    "    def parameters(self):\n",
+    "        return self.params"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e14acea9-e5ef-4c24-aea9-329647224ce1",
+   "metadata": {},
+   "source": [
+    "人工随机构造数据集。\n",
+    "\n",
+    "这里我遇到了比较大的问题。因为数据构建不合适，会导致后面的训练出现梯度爆炸。\n",
+    "\n",
+    "我采用随机产生数据后归一化的方法，即\n",
+    "$$\n",
+    "\\hat{x} = \\frac{x - \\text{min}_x}{\\text{max}_x - \\text{min}_x} \n",
+    "$$\n",
+    "将数据控制在合适的区间。\n",
+    "\n",
+    "我的y设置为$4-3\\times x + noise$，noise为随机噪声。\n",
+    "\n",
+    "生成完x和y后进行归一化处理，并写好DataLoader访问数据集的接口`__getitem__()`。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c39fbafb-62e4-4b8c-9d65-6718d25f2970",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "测试数据集大小：100\n",
+      "测试数据集第0对数据：\n",
+      "x_0 = 0.5531462811708403\n",
+      "y_0 = 0.42036701080526284\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_Dataset(Dataset):\n",
+    "    def __init__(self, data_size=1000000):\n",
+    "        np.random.seed(0)\n",
+    "        x = 2 * np.random.rand(data_size, 1)\n",
+    "        noise = 0.2 * np.random.randn(data_size, 1)\n",
+    "        y = 4 - 3 * x + noise\n",
+    "        self.min_x, self.max_x = np.min(x), np.max(x)\n",
+    "        min_y, max_y = np.min(y), np.max(y)\n",
+    "        x = (x - self.min_x) / (self.max_x - self.min_x)\n",
+    "        y = (y - min_y) / (max_y - min_y)\n",
+    "        self.data = [[x[i][0], y[i][0]] for i in range(x.shape[0])]\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.data)\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        x, y = self.data[index]\n",
+    "        return x, y\n",
+    "\n",
+    "\n",
+    "# 测试\n",
+    "dataset_test = My_Dataset(data_size=100)\n",
+    "dataset_size = len(dataset_test)\n",
+    "print(f\"测试数据集大小：{dataset_size}\")\n",
+    "x0, y0 = dataset_test[0]\n",
+    "print(f\"测试数据集第0对数据：\")\n",
+    "print(f\"x_0 = {x0}\")\n",
+    "print(f\"y_0 = {y0}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "957a76a2-b306-47a8-912e-8fbf00cdfd42",
+   "metadata": {},
+   "source": [
+    "训练Logistic回归模型。\n",
+    "进行如下步骤：\n",
+    "1. 初始化超参数\n",
+    "2. 获取数据集\n",
+    "3. 初始化模型\n",
+    "4. 定义损失函数和优化器\n",
+    "5. 训练\n",
+    "    1. 从训练dataloader中获取批量数据\n",
+    "    2. 传入模型\n",
+    "    3. 使用损失函数计算与ground_truth的损失\n",
+    "    4. 使用优化器进行反向传播\n",
+    "    5. 循环以上步骤\n",
+    "6. 测试\n",
+    "    1. 设置测试数据\n",
+    "    2. 传入模型\n",
+    "    3. 得到预测值"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5612661e-2809-4d46-96c2-33ee9f44116d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: 680.9198314547539, Acc: 0.9677169744703272\n",
+      "Epoch 2/10, Loss: 677.2582936882973, Acc: 0.9985965700887113\n",
+      "Epoch 3/10, Loss: 677.1911396384239, Acc: 0.9993738265049104\n",
+      "Epoch 4/10, Loss: 677.1777537465096, Acc: 0.9995470920810262\n",
+      "Epoch 5/10, Loss: 677.1745615005493, Acc: 0.9998228389835642\n",
+      "Epoch 6/10, Loss: 677.1743944883347, Acc: 0.9999690339979311\n",
+      "Epoch 7/10, Loss: 677.1735371947289, Acc: 0.9998205132243208\n",
+      "Epoch 8/10, Loss: 677.1737813353539, Acc: 0.999798559017381\n",
+      "Epoch 9/10, Loss: 677.1740361452103, Acc: 0.9998672931901137\n",
+      "Epoch 10/10, Loss: 677.1736125349998, Acc: 0.9997257713704987\n",
+      "Model weights: -0.0006128809181973338, bias: 0.023128816857933998\n",
+      "Prediction for test data: 0.505628764629364\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 5e-2\n",
+    "num_epochs = 10\n",
+    "batch_size = 1024\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "dataset = My_Dataset()\n",
+    "dataloader = DataLoader(\n",
+    "    dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True\n",
+    ")\n",
+    "\n",
+    "model = Model_2_1().to(device)\n",
+    "criterion = My_BCELoss()\n",
+    "optimizer = My_optimizer(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    total_epoch_pred = 0\n",
+    "    total_epoch_target = 0\n",
+    "    for x, targets in dataloader:\n",
+    "        optimizer.zero_grad()\n",
+    "        \n",
+    "        x = x.to(device).to(dtype=torch.float32)\n",
+    "        targets = targets.to(device).to(dtype=torch.float32)\n",
+    "        \n",
+    "        x = x.unsqueeze(1)\n",
+    "        y_pred = model(x)\n",
+    "        loss = criterion(y_pred, targets)\n",
+    "        total_epoch_loss += loss.item()\n",
+    "        total_epoch_target += targets.sum().item()\n",
+    "        total_epoch_pred += y_pred.sum().item()\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {1 - abs(total_epoch_pred - total_epoch_target) / total_epoch_target}\"\n",
+    "    )\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    test_data = (np.array([[2]]) - dataset.min_x) / (dataset.max_x - dataset.min_x)\n",
+    "    test_data = Variable(\n",
+    "        torch.tensor(test_data, dtype=torch.float32), requires_grad=False\n",
+    "    ).to(device)\n",
+    "    predicted = model(test_data).to(\"cpu\")\n",
+    "    print(\n",
+    "        f\"Model weights: {model.linear.weight.item()}, bias: {model.linear.bias.item()}\"\n",
+    "    )\n",
+    "    print(f\"Prediction for test data: {predicted.item()}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e416582-a30d-4084-acc6-6e05f80a6aff",
+   "metadata": {},
+   "source": [
+    "## 题目2\n",
+    "**利用 torch.nn 实现 logistic 回归在人工构造的数据集上进行训练和测试，并对结果进行分析，并从loss以及训练集上的准确率等多个角度对结果进行分析**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0460d125-7d03-44fe-845c-c4d13792e241",
+   "metadata": {},
+   "source": [
+    "使用torch.nn实现模型。\n",
+    "\n",
+    "将之前的Model_2_1中的手动实现函数改为torch.nn内置函数即可，再加上继承nn.Module以使用torch.nn内置模型模板特性。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "fa121afd-a1af-4193-9b54-68041e0ed068",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_2_2(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(Model_2_2, self).__init__()\n",
+    "        self.linear = nn.Linear(1, 1, dtype=torch.float64)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.linear(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "176eee7e-4e3d-470e-8af2-8761bca039f8",
+   "metadata": {},
+   "source": [
+    "训练与测试过程与之前手动实现的几乎一致。仅有少量涉及数据类型（dtype）的代码需要更改以适应torch.nn的内置函数要求。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "93b0fdb6-be8b-4663-b59e-05ed19a9ea09",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: 600.8090852049173, Acc: 0.9945839732715815\n",
+      "Epoch 2/10, Loss: 565.9542879898308, Acc: 0.9999073566261442\n",
+      "Epoch 3/10, Loss: 565.9275637627202, Acc: 0.9999969933728429\n",
+      "Epoch 4/10, Loss: 565.927609191542, Acc: 0.9999961959888584\n",
+      "Epoch 5/10, Loss: 565.928202885308, Acc: 0.9999953721249991\n",
+      "Epoch 6/10, Loss: 565.9323843971484, Acc: 0.9999969051674709\n",
+      "Epoch 7/10, Loss: 565.9298919086365, Acc: 0.9999935973983517\n",
+      "Epoch 8/10, Loss: 565.9299267993255, Acc: 0.9999985970973472\n",
+      "Epoch 9/10, Loss: 565.9306044380719, Acc: 0.9999947955797296\n",
+      "Epoch 10/10, Loss: 565.9329843268798, Acc: 0.9999973784035556\n",
+      "Model weights: -3.7066140776793373, bias: 1.8709382558479912\n",
+      "Prediction for test data: 0.13756338580653613\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 1e-2\n",
+    "num_epochs = 10\n",
+    "batch_size = 1024\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "dataset = My_Dataset()\n",
+    "dataloader = DataLoader(\n",
+    "    dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True\n",
+    ")\n",
+    "\n",
+    "model = Model_2_2().to(device)\n",
+    "criterion = nn.BCELoss()\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    total_epoch_pred = 0\n",
+    "    total_epoch_target = 0\n",
+    "    for x, targets in dataloader:\n",
+    "        optimizer.zero_grad()\n",
+    "\n",
+    "        x = x.to(device)\n",
+    "        targets = targets.to(device)\n",
+    "\n",
+    "        x = x.unsqueeze(1)\n",
+    "        targets = targets.unsqueeze(1)\n",
+    "        y_pred = model(x)\n",
+    "        loss = criterion(y_pred, targets)\n",
+    "        total_epoch_loss += loss.item()\n",
+    "        total_epoch_target += targets.sum().item()\n",
+    "        total_epoch_pred += y_pred.sum().item()\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {1 - abs(total_epoch_pred - total_epoch_target) / total_epoch_target}\"\n",
+    "    )\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    test_data = (np.array([[2]]) - dataset.min_x) / (dataset.max_x - dataset.min_x)\n",
+    "    test_data = Variable(\n",
+    "        torch.tensor(test_data, dtype=torch.float64), requires_grad=False\n",
+    "    ).to(device)\n",
+    "    predicted = model(test_data).to(\"cpu\")\n",
+    "    print(\n",
+    "        f\"Model weights: {model.linear.weight.item()}, bias: {model.linear.bias.item()}\"\n",
+    "    )\n",
+    "    print(f\"Prediction for test data: {predicted.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6bff679-f8d2-46cc-bdcb-82af7dab38b3",
+   "metadata": {},
+   "source": [
+    "对比发现，使用torch.nn的内置损失函数和优化器，正确率提升更快。\n",
+    "\n",
+    "但是为什么相同分布的数据集训练出的权重和偏置，以及预测结果存在较大差别，这个问题的原因还有待我探究。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ef41d7fa-c2bf-4024-833b-60af0a87043a",
+   "metadata": {},
+   "source": [
+    "# 三、动手实现softmax回归\n",
+    "\n",
+    "## 问题1\n",
+    "\n",
+    "**要求动手从0实现softmax回归（只借助Tensor和Numpy相关的库）在Fashion-MNIST数据集上进行训练和测试，并从loss、训练集以及测试集上的准确率等多个角度对结果进行分析（要求从零实现交叉熵损失函数）**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c356760-75a8-4814-ba69-73b270396a4e",
+   "metadata": {},
+   "source": [
+    "手动实现nn.one_hot()。\n",
+    "\n",
+    "one-hot向量用于消除线性标签值所映射的类别的非线性。\n",
+    "\n",
+    "one-hot向量是使用一个长度为分类数量的数组表示标签值，其中有且仅有1个值为为1，该值的下标为标签值；其余为0。\n",
+    "\n",
+    "原理很简单，步骤如下：\n",
+    "1. 初始化全零的张量，大小为（标签数量，分类数量）；\n",
+    "2. 将标签值映射到全零张量的\\[下标，标签值\\]中，将该位置为1；\n",
+    "3. 返回修改后的张量，即是ont-hot向量。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "e605f1b0-1d32-410f-bddf-402a85ccc9ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def my_one_hot(indices: torch.Tensor, num_classes: int):\n",
+    "    one_hot_tensor = torch.zeros(len(indices), num_classes).to(indices.device)\n",
+    "    one_hot_tensor.scatter_(1, indices.view(-1, 1), 1)\n",
+    "    return one_hot_tensor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "902603a6-bfb9-4ce3-bd0d-b00cebb1d3cb",
+   "metadata": {},
+   "source": [
+    "手动实现CrossEntropyLoss。\n",
+    "\n",
+    "CrossEntropyLoss由一个log_softmax和一个nll_loss组成。\n",
+    "\n",
+    "softmax的数学表达式如下：\n",
+    "$$\n",
+    "\\text{softmax}(y_i) = \\frac{e^{y_i - \\text{max}(y)}}{\\sum_{j=1}^{N} e^{y_j - \\text{max}(y)}} \n",
+    "$$\n",
+    "log_softmax即为$\\log\\left(softmax\\left(y\\right)\\right)$。\n",
+    "\n",
+    "CrossEntropyLoss的数学表达式如下：\n",
+    "$$\n",
+    "\\text{CrossEntropyLoss}(y, \\hat{y}) = -\\frac{1}{N} \\sum_{i=1}^{N} \\hat{y}_i \\cdot \\log(\\text{softmax}(y_i)) \n",
+    "$$\n",
+    "\n",
+    "故代码如下："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "759a3bb2-b5f4-4ea5-a2d7-15f0c4cdd14b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "输入：\n",
+      "tensor([[-1.2914,  0.4715, -0.0432,  1.7427, -1.9236],\n",
+      "        [ 0.5361, -0.7551, -0.6810,  1.0945,  0.6135],\n",
+      "        [-1.3398, -0.0026, -1.6066, -0.4659, -1.6076]], requires_grad=True)\n",
+      "标签：\n",
+      "tensor([[1., 0., 0., 0., 0.],\n",
+      "        [0., 0., 0., 0., 1.],\n",
+      "        [0., 0., 0., 0., 1.]])\n",
+      "My_CrossEntropyLoss损失值: 2.4310648441314697\n",
+      "nn.CrossEntropyLoss损失值: 2.4310646057128906\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_CrossEntropyLoss:\n",
+    "    def __call__(self, predictions: torch.Tensor, targets: torch.Tensor):\n",
+    "        max_values = torch.max(predictions, dim=1, keepdim=True).values\n",
+    "        exp_values = torch.exp(predictions - max_values)\n",
+    "        softmax_output = exp_values / torch.sum(exp_values, dim=1, keepdim=True)\n",
+    "        log_probs = torch.log(softmax_output)\n",
+    "        \n",
+    "        nll_loss = -torch.sum(targets * log_probs, dim=1)\n",
+    "        average_loss = torch.mean(nll_loss)\n",
+    "        return average_loss\n",
+    "\n",
+    "        \n",
+    "# 测试\n",
+    "input = torch.randn(3, 5, requires_grad=True)\n",
+    "target = torch.randn(3, 5).softmax(dim=1).argmax(1)\n",
+    "target = torch.nn.functional.one_hot(target, num_classes=5).to(dtype=torch.float32)\n",
+    "print(f\"输入：\\n{input}\")\n",
+    "print(f\"标签：\\n{target}\")\n",
+    "\n",
+    "my_crossentropyloss = My_CrossEntropyLoss()\n",
+    "my_loss = my_crossentropyloss(input, target)\n",
+    "print(\"My_CrossEntropyLoss损失值:\", my_loss.item())\n",
+    "\n",
+    "nn_crossentropyloss = nn.CrossEntropyLoss()\n",
+    "nn_loss = nn_crossentropyloss(input, target)\n",
+    "print(\"nn.CrossEntropyLoss损失值:\", nn_loss.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbf78501-f5be-4008-986c-d331d531491f",
+   "metadata": {},
+   "source": [
+    "手动实现Flatten。\n",
+    "\n",
+    "原理很简单，就是把多维的张量拉直成一个向量。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "74322629-8325-4823-b80f-f28182d577c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class My_Flatten:\n",
+    "    def __call__(self, x: torch.Tensor):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = x.view(x.shape[0], -1)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "35aee905-ae37-4faa-a7f1-a04cd8579f78",
+   "metadata": {},
+   "source": [
+    "手动实现softmax回归模型。\n",
+    "\n",
+    "模型很简单，主要由一个Flatten层和一个线性层组成。\n",
+    "\n",
+    "Flatten层主要用于将2维的图像展开，直接作为1维的特征量输入网络。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "bb31a75e-464c-4b94-b927-b219a765e35d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_3_1:\n",
+    "    def __init__(self, num_classes):\n",
+    "        self.flatten = My_Flatten()\n",
+    "        self.linear = My_Linear(28 * 28, num_classes)\n",
+    "        self.params = self.linear.params\n",
+    "\n",
+    "    def __call__(self, x: torch.Tensor):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = self.flatten(x)\n",
+    "        x = self.linear(x)\n",
+    "        return x\n",
+    "\n",
+    "    def to(self, device: str):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data.to(device=device)\n",
+    "        return self\n",
+    "\n",
+    "    def parameters(self):\n",
+    "        return self.params"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "17e686d1-9c9a-4727-8fdc-9990d348c523",
+   "metadata": {},
+   "source": [
+    "训练与测试过程与之前手动实现的几乎一致。由于数据集的变化，对应超参数也进行了调整。\n",
+    "\n",
+    "数据集也使用了现成的FashionMNIST数据集，且划分了训练集和测试集。\n",
+    "\n",
+    "FashionMNIST数据集直接调用API获取。数据集的image为28*28的单通道灰白图片，label为单个数值标签。\n",
+    "\n",
+    "训练softmax回归模型。\n",
+    "进行如下步骤：\n",
+    "1. 初始化超参数\n",
+    "2. 获取数据集\n",
+    "3. 初始化模型\n",
+    "4. 定义损失函数和优化器\n",
+    "5. 训练\n",
+    "    1. 从训练dataloader中获取批量数据\n",
+    "    2. 传入模型\n",
+    "    3. 使用损失函数计算与ground_truth的损失\n",
+    "    4. 使用优化器进行反向传播\n",
+    "    5. 循环以上步骤\n",
+    "6. 测试\n",
+    "    1. 从测试dataloader中获取批量数据\n",
+    "    2. 传入模型\n",
+    "    3. 将预测值与ground_truth进行比较，得出正确率\n",
+    "    4. 对整个训练集统计正确率，从而分析训练效果"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "d816dae1-5fbe-4c29-9597-19d66b5eb6b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 2/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 3/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 4/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 5/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 6/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 7/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 8/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 9/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 10/10, Loss: nan, Acc: 0.09999999403953552\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 5e-3\n",
+    "num_epochs = 10\n",
+    "batch_size = 4096\n",
+    "num_classes = 10\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "transform = transforms.Compose(\n",
+    "    [\n",
+    "        transforms.ToTensor(),\n",
+    "        transforms.Normalize((0.5,), (0.5,)),\n",
+    "    ]\n",
+    ")\n",
+    "train_dataset = datasets.FashionMNIST(root=\"./dataset\", train=True, transform=transform, download=True)\n",
+    "test_dataset = datasets.FashionMNIST(root=\"./dataset\", train=False, transform=transform, download=True)\n",
+    "train_loader = DataLoader(\n",
+    "    dataset=train_dataset, batch_size=batch_size,\n",
+    "    shuffle=True, num_workers=4, pin_memory=True,\n",
+    ")\n",
+    "test_loader = DataLoader(\n",
+    "    dataset=test_dataset, batch_size=batch_size,\n",
+    "    shuffle=True, num_workers=4, pin_memory=True,\n",
+    ")\n",
+    "\n",
+    "model = Model_3_1(num_classes).to(device)\n",
+    "criterion = My_CrossEntropyLoss()\n",
+    "optimizer = My_optimizer(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    for images, targets in train_loader:\n",
+    "        optimizer.zero_grad()\n",
+    "\n",
+    "        images = images.to(device)\n",
+    "        targets = targets.to(device).to(dtype=torch.long)\n",
+    "\n",
+    "        one_hot_targets = (\n",
+    "            my_one_hot(targets, num_classes=num_classes)\n",
+    "            .to(device)\n",
+    "            .to(dtype=torch.long)\n",
+    "        )\n",
+    "\n",
+    "        outputs = model(images)\n",
+    "        loss = criterion(outputs, one_hot_targets)\n",
+    "        total_epoch_loss += loss\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    total_acc = 0\n",
+    "    with torch.no_grad():\n",
+    "        for image, targets in test_loader:\n",
+    "            image = image.to(device)\n",
+    "            targets = targets.to(device)\n",
+    "            outputs = model(image)\n",
+    "            total_acc += (outputs.argmax(1) == targets).sum()\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {total_acc / len(test_dataset)}\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a49d0165-aeb7-48c0-9b67-956bb08cb356",
+   "metadata": {},
+   "source": [
+    "这里发现梯度爆炸。暂时无法解决。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ef5240f-8a11-4678-bfce-f1cbc7e71b77",
+   "metadata": {},
+   "source": [
+    "## 问题2\n",
+    "\n",
+    "**利用torch.nn实现softmax回归在Fashion-MNIST数据集上进行训练和测试，并从loss，训练集以及测试集上的准确率等多个角度对结果进行分析**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c4a88c6-637e-4af5-bed5-f644685dcabc",
+   "metadata": {},
+   "source": [
+    "使用torch.nn实现模型。\n",
+    "\n",
+    "将之前的Model_3_1中的手动实现函数改为torch.nn内置函数即可，再加上继承nn.Module以使用torch.nn内置模型模板特性。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "0163b9f7-1019-429c-8c29-06436d0a4c98",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_3_2(nn.Module):\n",
+    "    def __init__(self, num_classes):\n",
+    "        super(Model_3_2, self).__init__()\n",
+    "        self.flatten = nn.Flatten()\n",
+    "        self.linear = nn.Linear(28 * 28, num_classes)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = self.flatten(x)\n",
+    "        x = self.linear(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6e765ad7-c1c6-4166-bd7f-361666bd4016",
+   "metadata": {},
+   "source": [
+    "训练与测试过程与之前手动实现的几乎一致。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a58a23e1-368c-430a-ad62-0e256dff564d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: 15.148970603942871, Acc: 0.7520999908447266\n",
+      "Epoch 2/10, Loss: 9.012335777282715, Acc: 0.7996999621391296\n",
+      "Epoch 3/10, Loss: 7.9114227294921875, Acc: 0.8095999956130981\n",
+      "Epoch 4/10, Loss: 7.427404403686523, Acc: 0.8215999603271484\n",
+      "Epoch 5/10, Loss: 7.084254264831543, Acc: 0.8277999758720398\n",
+      "Epoch 6/10, Loss: 6.885956287384033, Acc: 0.8274999856948853\n",
+      "Epoch 7/10, Loss: 6.808426380157471, Acc: 0.8327999711036682\n",
+      "Epoch 8/10, Loss: 6.647855758666992, Acc: 0.8323000073432922\n",
+      "Epoch 9/10, Loss: 6.560361862182617, Acc: 0.8317999839782715\n",
+      "Epoch 10/10, Loss: 6.5211310386657715, Acc: 0.8349999785423279\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 5e-3\n",
+    "num_epochs = 10\n",
+    "batch_size = 4096\n",
+    "num_classes = 10\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "transform = transforms.Compose(\n",
+    "    [\n",
+    "        transforms.ToTensor(),\n",
+    "        transforms.Normalize((0.5,), (0.5,)),\n",
+    "    ]\n",
+    ")\n",
+    "train_dataset = datasets.FashionMNIST(\n",
+    "    root=\"./dataset\", train=True, transform=transform, download=True\n",
+    ")\n",
+    "test_dataset = datasets.FashionMNIST(\n",
+    "    root=\"./dataset\", train=False, transform=transform, download=True\n",
+    ")\n",
+    "train_loader = DataLoader(\n",
+    "    dataset=train_dataset,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=True,\n",
+    "    num_workers=4,\n",
+    "    pin_memory=True,\n",
+    ")\n",
+    "test_loader = DataLoader(\n",
+    "    dataset=test_dataset,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=True,\n",
+    "    num_workers=4,\n",
+    "    pin_memory=True,\n",
+    ")\n",
+    "\n",
+    "model = Model_3_2(num_classes).to(device)\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    model.train()\n",
+    "    for images, targets in train_loader:\n",
+    "        optimizer.zero_grad()\n",
+    "\n",
+    "        images = images.to(device)\n",
+    "        targets = targets.to(device)\n",
+    "\n",
+    "        one_hot_targets = (\n",
+    "            torch.nn.functional.one_hot(targets, num_classes=num_classes)\n",
+    "            .to(device)\n",
+    "            .to(dtype=torch.float32)\n",
+    "        )\n",
+    "\n",
+    "        outputs = model(images)\n",
+    "        loss = criterion(outputs, one_hot_targets)\n",
+    "        total_epoch_loss += loss\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    model.eval()\n",
+    "    total_acc = 0\n",
+    "    with torch.no_grad():\n",
+    "        for image, targets in test_loader:\n",
+    "            image = image.to(device)\n",
+    "            targets = targets.to(device)\n",
+    "            outputs = model(image)\n",
+    "            total_acc += (outputs.argmax(1) == targets).sum()\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {total_acc / len(test_dataset)}\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59555b67-1650-4e1a-a98e-7906878bf3d0",
+   "metadata": {},
+   "source": [
+    "与手动实现的softmax回归相比较，nn.CrossEntropyLoss比手动实现的My_CrossEntropyLoss更加稳定，没有出现梯度爆炸的情况。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f40431f2-e77b-4ead-81a3-ff6451a8e452",
+   "metadata": {},
+   "source": [
+    "**实验心得体会**\n",
+    "\n",
+    "通过完成本次Pytorch基本操作实验，让我对Pytorch框架有了更加深入的理解。我接触深度学习主要是在大语言模型领域，比较熟悉微调大模型，但是涉及到底层的深度学习知识，我还有很多短板和不足。这次实验对我这方面的锻炼让我收获良多。\n",
+    "\n",
+    "首先是数据集的设置。如果数据没有进行归一化，很容易出现梯度爆炸。这是在我以前直接使用图片数据集的经历中没有遇到过的问题。\n",
+    "\n",
+    "在实现logistic回归模型时，通过手动实现各个组件如优化器、线性层等，让我对这些模块的工作原理有了更清晰的认识。尤其是在实现广播机制时，需要充分理解张量操作的维度变换规律。而使用Pytorch内置模块进行实现时，通过继承nn.Module可以自动获得許多功能，使代码更加简洁。\n",
+    "\n",
+    "在实现softmax回归时，则遇到了更大的困难。手动实现的模型很容易出现梯度爆炸的问题，而使用Pytorch内置的损失函数和优化器则可以稳定训练。这让我意识到了选择合适的优化方法的重要性。另外，Pytorch强大的自动微分机制也是构建深度神经网络的重要基础。\n",
+    "\n",
+    "通过这个实验，让我对Pytorch框架有了更加直观的感受，也让我看到了仅靠基础模块搭建复杂模型的难点所在。这些经验对我后续使用Pytorch构建数据集模型会很有帮助。"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Lab1/.vscode/settings.json b/Lab1/.vscode/settings.json
new file mode 100644
index 0000000..d99f2f3
--- /dev/null
+++ b/Lab1/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.formatting.provider": "none"
+}
\ No newline at end of file
diff --git a/Lab1/Pytorch基本操作实验报告.ipynb b/Lab1/Pytorch基本操作实验报告.ipynb
new file mode 100644
index 0000000..c0645c6
--- /dev/null
+++ b/Lab1/Pytorch基本操作实验报告.ipynb
@@ -0,0 +1,1262 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3b57686b-7ac8-4897-bf76-3d982b1ff8da",
+   "metadata": {},
+   "source": [
+    "<h1><center>本科生《深度学习》课程<br>实验报告</center></h1>\n",
+    "<div style=\"text-align: center;\">\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">课程名称</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">深度学习</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">实验题目</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">Pytorch基本操作</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">学号</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">21281280</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">姓名</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">柯劲帆</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">班级</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">物联网2101班</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">指导老师</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">张淳杰</span></div>\n",
+    "    <div><span style=\"display: inline-block; width: 65px; text-align: center;\">报告日期</span><span style=\"display: inline-block; width: 25px;\">:</span><span style=\"display: inline-block; width: 210px; font-weight: bold; text-align: left;\">2023年10月9日</span></div>\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e24aa17e-faf9-4d69-9eae-43159116b56f",
+   "metadata": {},
+   "source": [
+    "实验环境：\n",
+    "- OS：Ubuntu 22.04 内核版本 6.2.0-34-generic\n",
+    "- CPU：12th Gen Intel(R) Core(TM) i7-12700H\n",
+    "- GPU：NVIDIA GeForce RTX 3070 Ti Laptop\n",
+    "- conda: miniconda 23.9.0\n",
+    "- python：3.10.13\n",
+    "- pytorch：2.1.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a4e12268-bad4-44c4-92d5-883624d93e25",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torch.autograd import Variable\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from torch import nn\n",
+    "from torchvision import datasets, transforms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cc7f0ce5-d613-425b-807c-78115632cd80",
+   "metadata": {},
+   "source": [
+    "引用相关库。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59a43d35-56ac-4ade-995d-1c6fcbcd1262",
+   "metadata": {},
+   "source": [
+    "# 一、Pytorch基本操作考察\n",
+    "## 题目2\n",
+    "**使用 𝐓𝐞𝐧𝐬𝐨𝐫 初始化一个 𝟏×𝟑 的矩阵 𝑴 和一个 𝟐×𝟏 的矩阵 𝑵，对两矩阵进行减法操作（要求实现三种不同的形式），给出结果并分析三种方式的不同（如果出现报错，分析报错的原因），同时需要指出在计算过程中发生了什么。**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "79ea46db-cf49-436c-9b5b-c6562d0da9e2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "方法1的结果:\n",
+      "tensor([[-3, -2, -1],\n",
+      "        [-4, -3, -2]])\n",
+      "方法2的结果:\n",
+      "tensor([[-3, -2, -1],\n",
+      "        [-4, -3, -2]])\n",
+      "方法3的结果:\n",
+      "tensor([[-3, -2, -1],\n",
+      "        [-4, -3, -2]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "A = torch.tensor([[1, 2, 3]])\n",
+    "\n",
+    "B = torch.tensor([[4],\n",
+    "                  [5]])\n",
+    "\n",
+    "# 方法1: 使用PyTorch的减法操作符\n",
+    "result1 = A - B\n",
+    "\n",
+    "# 方法2: 使用PyTorch的sub函数\n",
+    "result2 = torch.sub(A, B)\n",
+    "\n",
+    "# 方法3: 手动实现广播机制并作差\n",
+    "def mysub(a:torch.Tensor, b:torch.Tensor):\n",
+    "    if not (\n",
+    "        (a.size(0) == 1 and b.size(1) == 1) \n",
+    "        or \n",
+    "        (a.size(1) == 1 and b.size(0) == 1)\n",
+    "        ):\n",
+    "        raise ValueError(\"输入的张量大小无法满足广播机制的条件。\")\n",
+    "    else:\n",
+    "        target_shape = torch.Size([max(A.size(0), B.size(0)), max(A.size(1), B.size(1))])\n",
+    "        A_broadcasted = A.expand(target_shape)\n",
+    "        B_broadcasted = B.expand(target_shape)\n",
+    "        result = torch.zeros(target_shape, dtype=torch.int64).to(device=A_broadcasted.device)\n",
+    "        for i in range(target_shape[0]):\n",
+    "            for j in range(target_shape[1]):\n",
+    "                result[i, j] = A_broadcasted[i, j] - B_broadcasted[i, j]\n",
+    "        return result\n",
+    "\n",
+    "result3 = mysub(A, B)\n",
+    "\n",
+    "print(\"方法1的结果:\")\n",
+    "print(result1)\n",
+    "print(\"方法2的结果:\")\n",
+    "print(result2)\n",
+    "print(\"方法3的结果:\")\n",
+    "print(result3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2489a3ad-f6ff-4561-bb26-e02654090b98",
+   "metadata": {},
+   "source": [
+    "## 题目2\n",
+    "1. **利用Tensor创建两个大小分别3*2和4*2的随机数矩阵P和Q，要求服从均值为0，标准差0.01为的正态分布；**\n",
+    "2. **对第二步得到的矩阵Q进行形状变换得到Q的转置Q^T；**\n",
+    "3. **对上述得到的矩阵P和矩阵Q^T求矩阵相乘。**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "41e4ee02-1d05-4101-b3f0-477bac0277fb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "矩阵 P:\n",
+      "tensor([[ 0.0053,  0.0013],\n",
+      "        [-0.0086,  0.0136],\n",
+      "        [-0.0013,  0.0176]])\n",
+      "矩阵 Q:\n",
+      "tensor([[ 0.0044,  0.0014],\n",
+      "        [ 0.0147,  0.0078],\n",
+      "        [-0.0002, -0.0023],\n",
+      "        [ 0.0001, -0.0011]])\n",
+      "矩阵 QT:\n",
+      "tensor([[ 0.0044,  0.0147, -0.0002,  0.0001],\n",
+      "        [ 0.0014,  0.0078, -0.0023, -0.0011]])\n",
+      "矩阵相乘的结果:\n",
+      "tensor([[ 2.4953e-05,  8.7463e-05, -3.8665e-06, -8.9576e-07],\n",
+      "        [-1.9514e-05, -2.0557e-05, -2.9649e-05, -1.5913e-05],\n",
+      "        [ 1.8189e-05,  1.1834e-04, -4.0097e-05, -1.9608e-05]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "mean = 0\n",
+    "stddev = 0.01\n",
+    "\n",
+    "P = torch.normal(mean=mean, std=stddev, size=(3, 2))\n",
+    "Q = torch.normal(mean=mean, std=stddev, size=(4, 2))\n",
+    "\n",
+    "print(\"矩阵 P:\")\n",
+    "print(P)\n",
+    "print(\"矩阵 Q:\")\n",
+    "print(Q)\n",
+    "\n",
+    "# 对矩阵Q进行转置操作，得到矩阵Q的转置Q^T\n",
+    "QT = Q.T\n",
+    "print(\"矩阵 QT:\")\n",
+    "print(QT)\n",
+    "\n",
+    "# 计算矩阵P和矩阵Q^T的矩阵相乘\n",
+    "result = torch.matmul(P, QT)\n",
+    "print(\"矩阵相乘的结果:\")\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cea9cb6d-adde-4e08-b9f2-8c417abf4231",
+   "metadata": {},
+   "source": [
+    "## 题目2\n",
+    "**给定公式$ y_3=y_1+y_2=𝑥^2+𝑥^3$，且$x=1$。利用学习所得到的Tensor的相关知识，求$y_3$对$x$的梯度，即$\\frac{dy_3}{dx}$。**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "951512cd-d915-4d04-959f-eb99d1971e2d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "梯度(dy_3/dx):  2.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "x = torch.tensor(1.0, requires_grad=True)\n",
+    "y_1 = x ** 2\n",
+    "with torch.no_grad():\n",
+    "    y_2 = x**3\n",
+    "\n",
+    "y3 = y_1 + y_2\n",
+    "\n",
+    "y3.backward()\n",
+    "\n",
+    "print(\"梯度(dy_3/dx): \", x.grad.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3269dbf6-889a-49eb-8094-1e588e1a6c30",
+   "metadata": {},
+   "source": [
+    "# 二、动手实现logistic回归\n",
+    "## 题目1\n",
+    "**要求动手从0实现 logistic 回归（只借助Tensor和Numpy相关的库）在人工构造的数据集上进行训练和测试，并从loss以及训练集上的准确率等多个角度对结果进行分析（可借助nn.BCELoss或nn.BCEWithLogitsLoss作为损失函数，从零实现二元交叉熵为选作）**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bcd12aa9-f187-4d88-8c59-af6d16107edb",
+   "metadata": {},
+   "source": [
+    "给定预测概率$ \\left( \\hat{y} \\right) $和目标标签$ \\left( y \\right)$（通常是0或1），BCELoss的计算公式如下：\n",
+    "$$\n",
+    " \\text{BCELoss}(\\hat{y}, y) = -\\frac{1}{N} \\sum_{i=1}^{N} \\left(y_i \\cdot \\log(\\hat{y}_i) + (1 - y_i) \\cdot \\log(1 - \\hat{y}_i)\\right) \n",
+    "$$\n",
+    "其中，$\\left( N \\right) $是样本数量，$\\left( \\hat{y}_i \\right) $表示模型的预测概率向量中的第$ \\left( i \\right) $个元素，$\\left( y_i \\right) $表示实际的目标标签中的第$ \\left( i \\right) $个元素。在二分类问题中，$\\left( y_i \\right) $通常是0或1。这个公式表示对所有样本的二分类交叉熵损失进行了求和并取平均。\n",
+    "\n",
+    "因此BCELoss的手动实现如下。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e31b86ec-4114-48dd-8d73-fe4e0686419a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "输入：\n",
+      "tensor([0.6900])\n",
+      "标签：\n",
+      "tensor([1.])\n",
+      "My_BCELoss损失值: 0.37110066413879395\n",
+      "nn.BCELoss损失值: 0.37110066413879395\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_BCELoss:\n",
+    "    def __call__(self, prediction: torch.Tensor, target: torch.Tensor):\n",
+    "        loss = -torch.mean(target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction))\n",
+    "        return loss\n",
+    "\n",
+    "\n",
+    "# 测试\n",
+    "prediction = torch.sigmoid(torch.tensor([0.8]))\n",
+    "target = torch.tensor([1.0])\n",
+    "print(f\"输入：\\n{prediction}\")\n",
+    "print(f\"标签：\\n{target}\")\n",
+    "\n",
+    "my_bce_loss = My_BCELoss()\n",
+    "my_loss = my_bce_loss(prediction, target)\n",
+    "print(\"My_BCELoss损失值:\", my_loss.item())\n",
+    "\n",
+    "nn_bce_loss = nn.BCELoss()\n",
+    "nn_loss = nn_bce_loss(prediction, target)\n",
+    "print(\"nn.BCELoss损失值:\", nn_loss.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "345b0300-8808-4c43-9bf9-05a7e6e1f5af",
+   "metadata": {},
+   "source": [
+    "Optimizer的实现较为简单。\n",
+    "\n",
+    "主要实现：\n",
+    "- 传入参数：`__init__()`\n",
+    "- 对传入的参数进行更新：`step()`\n",
+    "- 清空传入参数存储的梯度：`zero_grad()`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0297066c-9fc1-448d-bdcb-29a6f1519117",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "y.backward()之后，x的梯度:  2.0\n",
+      "optimizer_test.step()之后，x的值:  0.800000011920929\n",
+      "optimizer_test.zero_grad()之后，x的梯度:  0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_optimizer:\n",
+    "    def __init__(self, params: list[torch.Tensor], lr: float):\n",
+    "        self.params = params\n",
+    "        self.lr = lr\n",
+    "\n",
+    "    def step(self):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data - self.lr * param.grad.data\n",
+    "\n",
+    "    def zero_grad(self):\n",
+    "        for param in self.params:\n",
+    "            if param.grad is not None:\n",
+    "                param.grad.data.zero_()\n",
+    "\n",
+    "\n",
+    "# 测试\n",
+    "x = torch.tensor(1.0, requires_grad=True)\n",
+    "y = x ** 2\n",
+    "optimizer_test = My_optimizer([x], lr=0.1)\n",
+    "\n",
+    "y.backward()\n",
+    "print(\"y.backward()之后，x的梯度: \", x.grad.item())\n",
+    "\n",
+    "optimizer_test.step()\n",
+    "print(\"optimizer_test.step()之后，x的值: \", x.item())\n",
+    "\n",
+    "optimizer_test.zero_grad()\n",
+    "print(\"optimizer_test.zero_grad()之后，x的梯度: \", x.grad.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6ab83528-a88b-4d66-b0c9-b1315cf75c22",
+   "metadata": {},
+   "source": [
+    "线性层主要有一个权重（weight)和一个偏置（bias）。\n",
+    "线性层的数学公式如下：\n",
+    "$$\n",
+    "x:=x \\times weight^T+bias\n",
+    "$$\n",
+    "因此代码实现如下："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "8e18695a-d8c5-4f77-8b5c-de40d9240fb9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "输入：\n",
+      "tensor([[1.],\n",
+      "        [2.]], requires_grad=True)\n",
+      "权重：\n",
+      "tensor([[-1.0980],\n",
+      "        [-0.5413],\n",
+      "        [ 1.5884]], requires_grad=True)\n",
+      "偏置：\n",
+      "tensor([-1.1733], requires_grad=True)\n",
+      "输出：\n",
+      "tensor([[-2.2713, -1.7146,  0.4151],\n",
+      "        [-3.3692, -2.2559,  2.0036]], grad_fn=<AddBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_Linear:\n",
+    "    def __init__(self, input_feature: int, output_feature: int):\n",
+    "        self.weight = torch.randn((output_feature, input_feature), requires_grad=True, dtype=torch.float32)\n",
+    "        self.bias = torch.randn(1, requires_grad=True, dtype=torch.float32)\n",
+    "        self.params = [self.weight, self.bias]\n",
+    "\n",
+    "    def __call__(self, x: torch.Tensor):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = torch.matmul(x, self.weight.T) + self.bias\n",
+    "        return x\n",
+    "\n",
+    "    def to(self, device: str):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data.to(device=device)\n",
+    "        return self\n",
+    "\n",
+    "    def parameters(self):\n",
+    "        return self.params\n",
+    "\n",
+    "        \n",
+    "# 测试\n",
+    "linear_test = My_Linear(1, 3)\n",
+    "x = torch.tensor([[1.], [2.]], requires_grad=True)\n",
+    "print(f\"输入：\\n{x}\")\n",
+    "print(f\"权重：\\n{linear_test.weight}\")\n",
+    "print(f\"偏置：\\n{linear_test.bias}\")\n",
+    "y = linear_test(x)\n",
+    "print(f\"输出：\\n{y}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ff813cc-c1f0-4c73-a3e8-d6796ef5d366",
+   "metadata": {},
+   "source": [
+    "手动实现logistic回归模型。\n",
+    "\n",
+    "模型很简单，主要由一个线性层和一个sigmoid层组成。\n",
+    "\n",
+    "Sigmoid函数（又称为 Logistic函数）是一种常用的激活函数，通常用于神经网络的输出层或隐藏层，其作用是将输入的实数值压缩到一个范围在0和1之间的数值。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e7de7e4b-a084-4793-812e-46e8550ecd8d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_2_1():\n",
+    "    def __init__(self):\n",
+    "        self.linear = My_Linear(1, 1)\n",
+    "        self.params = self.linear.params\n",
+    "\n",
+    "    def __call__(self, x):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.linear(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x\n",
+    "\n",
+    "    def to(self, device: str):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data.to(device=device)\n",
+    "        return self\n",
+    "\n",
+    "    def parameters(self):\n",
+    "        return self.params"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e14acea9-e5ef-4c24-aea9-329647224ce1",
+   "metadata": {},
+   "source": [
+    "人工随机构造数据集。\n",
+    "\n",
+    "这里我遇到了比较大的问题。因为数据构建不合适，会导致后面的训练出现梯度爆炸。\n",
+    "\n",
+    "我采用随机产生数据后归一化的方法，即\n",
+    "$$\n",
+    "\\hat{x} = \\frac{x - \\text{min}_x}{\\text{max}_x - \\text{min}_x} \n",
+    "$$\n",
+    "将数据控制在合适的区间。\n",
+    "\n",
+    "我的y设置为$4-3\\times x + noise$，noise为随机噪声。\n",
+    "\n",
+    "生成完x和y后进行归一化处理，并写好DataLoader访问数据集的接口`__getitem__()`。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c39fbafb-62e4-4b8c-9d65-6718d25f2970",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "测试数据集大小：100\n",
+      "测试数据集第0对数据：\n",
+      "x_0 = 0.5531462811708403\n",
+      "y_0 = 0.42036701080526284\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_Dataset(Dataset):\n",
+    "    def __init__(self, data_size=1000000):\n",
+    "        np.random.seed(0)\n",
+    "        x = 2 * np.random.rand(data_size, 1)\n",
+    "        noise = 0.2 * np.random.randn(data_size, 1)\n",
+    "        y = 4 - 3 * x + noise\n",
+    "        self.min_x, self.max_x = np.min(x), np.max(x)\n",
+    "        min_y, max_y = np.min(y), np.max(y)\n",
+    "        x = (x - self.min_x) / (self.max_x - self.min_x)\n",
+    "        y = (y - min_y) / (max_y - min_y)\n",
+    "        self.data = [[x[i][0], y[i][0]] for i in range(x.shape[0])]\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.data)\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        x, y = self.data[index]\n",
+    "        return x, y\n",
+    "\n",
+    "\n",
+    "# 测试\n",
+    "dataset_test = My_Dataset(data_size=100)\n",
+    "dataset_size = len(dataset_test)\n",
+    "print(f\"测试数据集大小：{dataset_size}\")\n",
+    "x0, y0 = dataset_test[0]\n",
+    "print(f\"测试数据集第0对数据：\")\n",
+    "print(f\"x_0 = {x0}\")\n",
+    "print(f\"y_0 = {y0}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "957a76a2-b306-47a8-912e-8fbf00cdfd42",
+   "metadata": {},
+   "source": [
+    "训练Logistic回归模型。\n",
+    "进行如下步骤：\n",
+    "1. 初始化超参数\n",
+    "2. 获取数据集\n",
+    "3. 初始化模型\n",
+    "4. 定义损失函数和优化器\n",
+    "5. 训练\n",
+    "    1. 从训练dataloader中获取批量数据\n",
+    "    2. 传入模型\n",
+    "    3. 使用损失函数计算与ground_truth的损失\n",
+    "    4. 使用优化器进行反向传播\n",
+    "    5. 循环以上步骤\n",
+    "6. 测试\n",
+    "    1. 设置测试数据\n",
+    "    2. 传入模型\n",
+    "    3. 得到预测值"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5612661e-2809-4d46-96c2-33ee9f44116d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: 680.9198314547539, Acc: 0.9677169744703272\n",
+      "Epoch 2/10, Loss: 677.2582936882973, Acc: 0.9985965700887113\n",
+      "Epoch 3/10, Loss: 677.1911396384239, Acc: 0.9993738265049104\n",
+      "Epoch 4/10, Loss: 677.1777537465096, Acc: 0.9995470920810262\n",
+      "Epoch 5/10, Loss: 677.1745615005493, Acc: 0.9998228389835642\n",
+      "Epoch 6/10, Loss: 677.1743944883347, Acc: 0.9999690339979311\n",
+      "Epoch 7/10, Loss: 677.1735371947289, Acc: 0.9998205132243208\n",
+      "Epoch 8/10, Loss: 677.1737813353539, Acc: 0.999798559017381\n",
+      "Epoch 9/10, Loss: 677.1740361452103, Acc: 0.9998672931901137\n",
+      "Epoch 10/10, Loss: 677.1736125349998, Acc: 0.9997257713704987\n",
+      "Model weights: -0.0006128809181973338, bias: 0.023128816857933998\n",
+      "Prediction for test data: 0.505628764629364\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 5e-2\n",
+    "num_epochs = 10\n",
+    "batch_size = 1024\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "dataset = My_Dataset()\n",
+    "dataloader = DataLoader(\n",
+    "    dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True\n",
+    ")\n",
+    "\n",
+    "model = Model_2_1().to(device)\n",
+    "criterion = My_BCELoss()\n",
+    "optimizer = My_optimizer(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    total_epoch_pred = 0\n",
+    "    total_epoch_target = 0\n",
+    "    for x, targets in dataloader:\n",
+    "        optimizer.zero_grad()\n",
+    "        \n",
+    "        x = x.to(device).to(dtype=torch.float32)\n",
+    "        targets = targets.to(device).to(dtype=torch.float32)\n",
+    "        \n",
+    "        x = x.unsqueeze(1)\n",
+    "        y_pred = model(x)\n",
+    "        loss = criterion(y_pred, targets)\n",
+    "        total_epoch_loss += loss.item()\n",
+    "        total_epoch_target += targets.sum().item()\n",
+    "        total_epoch_pred += y_pred.sum().item()\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {1 - abs(total_epoch_pred - total_epoch_target) / total_epoch_target}\"\n",
+    "    )\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    test_data = (np.array([[2]]) - dataset.min_x) / (dataset.max_x - dataset.min_x)\n",
+    "    test_data = Variable(\n",
+    "        torch.tensor(test_data, dtype=torch.float32), requires_grad=False\n",
+    "    ).to(device)\n",
+    "    predicted = model(test_data).to(\"cpu\")\n",
+    "    print(\n",
+    "        f\"Model weights: {model.linear.weight.item()}, bias: {model.linear.bias.item()}\"\n",
+    "    )\n",
+    "    print(f\"Prediction for test data: {predicted.item()}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e416582-a30d-4084-acc6-6e05f80a6aff",
+   "metadata": {},
+   "source": [
+    "## 题目2\n",
+    "**利用 torch.nn 实现 logistic 回归在人工构造的数据集上进行训练和测试，并对结果进行分析，并从loss以及训练集上的准确率等多个角度对结果进行分析**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0460d125-7d03-44fe-845c-c4d13792e241",
+   "metadata": {},
+   "source": [
+    "使用torch.nn实现模型。\n",
+    "\n",
+    "将之前的Model_2_1中的手动实现函数改为torch.nn内置函数即可，再加上继承nn.Module以使用torch.nn内置模型模板特性。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "fa121afd-a1af-4193-9b54-68041e0ed068",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_2_2(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(Model_2_2, self).__init__()\n",
+    "        self.linear = nn.Linear(1, 1, dtype=torch.float64)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.linear(x)\n",
+    "        x = torch.sigmoid(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "176eee7e-4e3d-470e-8af2-8761bca039f8",
+   "metadata": {},
+   "source": [
+    "训练与测试过程与之前手动实现的几乎一致。仅有少量涉及数据类型（dtype）的代码需要更改以适应torch.nn的内置函数要求。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "93b0fdb6-be8b-4663-b59e-05ed19a9ea09",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: 600.8090852049173, Acc: 0.9945839732715815\n",
+      "Epoch 2/10, Loss: 565.9542879898308, Acc: 0.9999073566261442\n",
+      "Epoch 3/10, Loss: 565.9275637627202, Acc: 0.9999969933728429\n",
+      "Epoch 4/10, Loss: 565.927609191542, Acc: 0.9999961959888584\n",
+      "Epoch 5/10, Loss: 565.928202885308, Acc: 0.9999953721249991\n",
+      "Epoch 6/10, Loss: 565.9323843971484, Acc: 0.9999969051674709\n",
+      "Epoch 7/10, Loss: 565.9298919086365, Acc: 0.9999935973983517\n",
+      "Epoch 8/10, Loss: 565.9299267993255, Acc: 0.9999985970973472\n",
+      "Epoch 9/10, Loss: 565.9306044380719, Acc: 0.9999947955797296\n",
+      "Epoch 10/10, Loss: 565.9329843268798, Acc: 0.9999973784035556\n",
+      "Model weights: -3.7066140776793373, bias: 1.8709382558479912\n",
+      "Prediction for test data: 0.13756338580653613\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 1e-2\n",
+    "num_epochs = 10\n",
+    "batch_size = 1024\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "dataset = My_Dataset()\n",
+    "dataloader = DataLoader(\n",
+    "    dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True\n",
+    ")\n",
+    "\n",
+    "model = Model_2_2().to(device)\n",
+    "criterion = nn.BCELoss()\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    total_epoch_pred = 0\n",
+    "    total_epoch_target = 0\n",
+    "    for x, targets in dataloader:\n",
+    "        optimizer.zero_grad()\n",
+    "\n",
+    "        x = x.to(device)\n",
+    "        targets = targets.to(device)\n",
+    "\n",
+    "        x = x.unsqueeze(1)\n",
+    "        targets = targets.unsqueeze(1)\n",
+    "        y_pred = model(x)\n",
+    "        loss = criterion(y_pred, targets)\n",
+    "        total_epoch_loss += loss.item()\n",
+    "        total_epoch_target += targets.sum().item()\n",
+    "        total_epoch_pred += y_pred.sum().item()\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {1 - abs(total_epoch_pred - total_epoch_target) / total_epoch_target}\"\n",
+    "    )\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    test_data = (np.array([[2]]) - dataset.min_x) / (dataset.max_x - dataset.min_x)\n",
+    "    test_data = Variable(\n",
+    "        torch.tensor(test_data, dtype=torch.float64), requires_grad=False\n",
+    "    ).to(device)\n",
+    "    predicted = model(test_data).to(\"cpu\")\n",
+    "    print(\n",
+    "        f\"Model weights: {model.linear.weight.item()}, bias: {model.linear.bias.item()}\"\n",
+    "    )\n",
+    "    print(f\"Prediction for test data: {predicted.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6bff679-f8d2-46cc-bdcb-82af7dab38b3",
+   "metadata": {},
+   "source": [
+    "对比发现，使用torch.nn的内置损失函数和优化器，正确率提升更快。\n",
+    "\n",
+    "但是为什么相同分布的数据集训练出的权重和偏置，以及预测结果存在较大差别，这个问题的原因还有待我探究。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ef41d7fa-c2bf-4024-833b-60af0a87043a",
+   "metadata": {},
+   "source": [
+    "# 三、动手实现softmax回归\n",
+    "\n",
+    "## 问题1\n",
+    "\n",
+    "**要求动手从0实现softmax回归（只借助Tensor和Numpy相关的库）在Fashion-MNIST数据集上进行训练和测试，并从loss、训练集以及测试集上的准确率等多个角度对结果进行分析（要求从零实现交叉熵损失函数）**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c356760-75a8-4814-ba69-73b270396a4e",
+   "metadata": {},
+   "source": [
+    "手动实现nn.one_hot()。\n",
+    "\n",
+    "one-hot向量用于消除线性标签值所映射的类别的非线性。\n",
+    "\n",
+    "one-hot向量是使用一个长度为分类数量的数组表示标签值，其中有且仅有1个值为为1，该值的下标为标签值；其余为0。\n",
+    "\n",
+    "原理很简单，步骤如下：\n",
+    "1. 初始化全零的张量，大小为（标签数量，分类数量）；\n",
+    "2. 将标签值映射到全零张量的\\[下标，标签值\\]中，将该位置为1；\n",
+    "3. 返回修改后的张量，即是ont-hot向量。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "e605f1b0-1d32-410f-bddf-402a85ccc9ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def my_one_hot(indices: torch.Tensor, num_classes: int):\n",
+    "    one_hot_tensor = torch.zeros(len(indices), num_classes).to(indices.device)\n",
+    "    one_hot_tensor.scatter_(1, indices.view(-1, 1), 1)\n",
+    "    return one_hot_tensor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "902603a6-bfb9-4ce3-bd0d-b00cebb1d3cb",
+   "metadata": {},
+   "source": [
+    "手动实现CrossEntropyLoss。\n",
+    "\n",
+    "CrossEntropyLoss由一个log_softmax和一个nll_loss组成。\n",
+    "\n",
+    "softmax的数学表达式如下：\n",
+    "$$\n",
+    "\\text{softmax}(y_i) = \\frac{e^{y_i - \\text{max}(y)}}{\\sum_{j=1}^{N} e^{y_j - \\text{max}(y)}} \n",
+    "$$\n",
+    "log_softmax即为$\\log\\left(softmax\\left(y\\right)\\right)$。\n",
+    "\n",
+    "CrossEntropyLoss的数学表达式如下：\n",
+    "$$\n",
+    "\\text{CrossEntropyLoss}(y, \\hat{y}) = -\\frac{1}{N} \\sum_{i=1}^{N} \\hat{y}_i \\cdot \\log(\\text{softmax}(y_i)) \n",
+    "$$\n",
+    "\n",
+    "故代码如下："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "759a3bb2-b5f4-4ea5-a2d7-15f0c4cdd14b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "输入：\n",
+      "tensor([[-1.2914,  0.4715, -0.0432,  1.7427, -1.9236],\n",
+      "        [ 0.5361, -0.7551, -0.6810,  1.0945,  0.6135],\n",
+      "        [-1.3398, -0.0026, -1.6066, -0.4659, -1.6076]], requires_grad=True)\n",
+      "标签：\n",
+      "tensor([[1., 0., 0., 0., 0.],\n",
+      "        [0., 0., 0., 0., 1.],\n",
+      "        [0., 0., 0., 0., 1.]])\n",
+      "My_CrossEntropyLoss损失值: 2.4310648441314697\n",
+      "nn.CrossEntropyLoss损失值: 2.4310646057128906\n"
+     ]
+    }
+   ],
+   "source": [
+    "class My_CrossEntropyLoss:\n",
+    "    def __call__(self, predictions: torch.Tensor, targets: torch.Tensor):\n",
+    "        max_values = torch.max(predictions, dim=1, keepdim=True).values\n",
+    "        exp_values = torch.exp(predictions - max_values)\n",
+    "        softmax_output = exp_values / torch.sum(exp_values, dim=1, keepdim=True)\n",
+    "        log_probs = torch.log(softmax_output)\n",
+    "        \n",
+    "        nll_loss = -torch.sum(targets * log_probs, dim=1)\n",
+    "        average_loss = torch.mean(nll_loss)\n",
+    "        return average_loss\n",
+    "\n",
+    "        \n",
+    "# 测试\n",
+    "input = torch.randn(3, 5, requires_grad=True)\n",
+    "target = torch.randn(3, 5).softmax(dim=1).argmax(1)\n",
+    "target = torch.nn.functional.one_hot(target, num_classes=5).to(dtype=torch.float32)\n",
+    "print(f\"输入：\\n{input}\")\n",
+    "print(f\"标签：\\n{target}\")\n",
+    "\n",
+    "my_crossentropyloss = My_CrossEntropyLoss()\n",
+    "my_loss = my_crossentropyloss(input, target)\n",
+    "print(\"My_CrossEntropyLoss损失值:\", my_loss.item())\n",
+    "\n",
+    "nn_crossentropyloss = nn.CrossEntropyLoss()\n",
+    "nn_loss = nn_crossentropyloss(input, target)\n",
+    "print(\"nn.CrossEntropyLoss损失值:\", nn_loss.item())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbf78501-f5be-4008-986c-d331d531491f",
+   "metadata": {},
+   "source": [
+    "手动实现Flatten。\n",
+    "\n",
+    "原理很简单，就是把多维的张量拉直成一个向量。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "74322629-8325-4823-b80f-f28182d577c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class My_Flatten:\n",
+    "    def __call__(self, x: torch.Tensor):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = x.view(x.shape[0], -1)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "35aee905-ae37-4faa-a7f1-a04cd8579f78",
+   "metadata": {},
+   "source": [
+    "手动实现softmax回归模型。\n",
+    "\n",
+    "模型很简单，主要由一个Flatten层和一个线性层组成。\n",
+    "\n",
+    "Flatten层主要用于将2维的图像展开，直接作为1维的特征量输入网络。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "bb31a75e-464c-4b94-b927-b219a765e35d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_3_1:\n",
+    "    def __init__(self, num_classes):\n",
+    "        self.flatten = My_Flatten()\n",
+    "        self.linear = My_Linear(28 * 28, num_classes)\n",
+    "        self.params = self.linear.params\n",
+    "\n",
+    "    def __call__(self, x: torch.Tensor):\n",
+    "        return self.forward(x)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = self.flatten(x)\n",
+    "        x = self.linear(x)\n",
+    "        return x\n",
+    "\n",
+    "    def to(self, device: str):\n",
+    "        for param in self.params:\n",
+    "            param.data = param.data.to(device=device)\n",
+    "        return self\n",
+    "\n",
+    "    def parameters(self):\n",
+    "        return self.params"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "17e686d1-9c9a-4727-8fdc-9990d348c523",
+   "metadata": {},
+   "source": [
+    "训练与测试过程与之前手动实现的几乎一致。由于数据集的变化，对应超参数也进行了调整。\n",
+    "\n",
+    "数据集也使用了现成的FashionMNIST数据集，且划分了训练集和测试集。\n",
+    "\n",
+    "FashionMNIST数据集直接调用API获取。数据集的image为28*28的单通道灰白图片，label为单个数值标签。\n",
+    "\n",
+    "训练softmax回归模型。\n",
+    "进行如下步骤：\n",
+    "1. 初始化超参数\n",
+    "2. 获取数据集\n",
+    "3. 初始化模型\n",
+    "4. 定义损失函数和优化器\n",
+    "5. 训练\n",
+    "    1. 从训练dataloader中获取批量数据\n",
+    "    2. 传入模型\n",
+    "    3. 使用损失函数计算与ground_truth的损失\n",
+    "    4. 使用优化器进行反向传播\n",
+    "    5. 循环以上步骤\n",
+    "6. 测试\n",
+    "    1. 从测试dataloader中获取批量数据\n",
+    "    2. 传入模型\n",
+    "    3. 将预测值与ground_truth进行比较，得出正确率\n",
+    "    4. 对整个训练集统计正确率，从而分析训练效果"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "d816dae1-5fbe-4c29-9597-19d66b5eb6b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 2/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 3/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 4/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 5/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 6/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 7/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 8/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 9/10, Loss: nan, Acc: 0.09999999403953552\n",
+      "Epoch 10/10, Loss: nan, Acc: 0.09999999403953552\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 5e-3\n",
+    "num_epochs = 10\n",
+    "batch_size = 4096\n",
+    "num_classes = 10\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "transform = transforms.Compose(\n",
+    "    [\n",
+    "        transforms.ToTensor(),\n",
+    "        transforms.Normalize((0.5,), (0.5,)),\n",
+    "    ]\n",
+    ")\n",
+    "train_dataset = datasets.FashionMNIST(root=\"./dataset\", train=True, transform=transform, download=True)\n",
+    "test_dataset = datasets.FashionMNIST(root=\"./dataset\", train=False, transform=transform, download=True)\n",
+    "train_loader = DataLoader(\n",
+    "    dataset=train_dataset, batch_size=batch_size,\n",
+    "    shuffle=True, num_workers=4, pin_memory=True,\n",
+    ")\n",
+    "test_loader = DataLoader(\n",
+    "    dataset=test_dataset, batch_size=batch_size,\n",
+    "    shuffle=True, num_workers=4, pin_memory=True,\n",
+    ")\n",
+    "\n",
+    "model = Model_3_1(num_classes).to(device)\n",
+    "criterion = My_CrossEntropyLoss()\n",
+    "optimizer = My_optimizer(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    for images, targets in train_loader:\n",
+    "        optimizer.zero_grad()\n",
+    "\n",
+    "        images = images.to(device)\n",
+    "        targets = targets.to(device).to(dtype=torch.long)\n",
+    "\n",
+    "        one_hot_targets = (\n",
+    "            my_one_hot(targets, num_classes=num_classes)\n",
+    "            .to(device)\n",
+    "            .to(dtype=torch.long)\n",
+    "        )\n",
+    "\n",
+    "        outputs = model(images)\n",
+    "        loss = criterion(outputs, one_hot_targets)\n",
+    "        total_epoch_loss += loss\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    total_acc = 0\n",
+    "    with torch.no_grad():\n",
+    "        for image, targets in test_loader:\n",
+    "            image = image.to(device)\n",
+    "            targets = targets.to(device)\n",
+    "            outputs = model(image)\n",
+    "            total_acc += (outputs.argmax(1) == targets).sum()\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {total_acc / len(test_dataset)}\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a49d0165-aeb7-48c0-9b67-956bb08cb356",
+   "metadata": {},
+   "source": [
+    "这里发现梯度爆炸。暂时无法解决。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ef5240f-8a11-4678-bfce-f1cbc7e71b77",
+   "metadata": {},
+   "source": [
+    "## 问题2\n",
+    "\n",
+    "**利用torch.nn实现softmax回归在Fashion-MNIST数据集上进行训练和测试，并从loss，训练集以及测试集上的准确率等多个角度对结果进行分析**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c4a88c6-637e-4af5-bed5-f644685dcabc",
+   "metadata": {},
+   "source": [
+    "使用torch.nn实现模型。\n",
+    "\n",
+    "将之前的Model_3_1中的手动实现函数改为torch.nn内置函数即可，再加上继承nn.Module以使用torch.nn内置模型模板特性。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "0163b9f7-1019-429c-8c29-06436d0a4c98",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model_3_2(nn.Module):\n",
+    "    def __init__(self, num_classes):\n",
+    "        super(Model_3_2, self).__init__()\n",
+    "        self.flatten = nn.Flatten()\n",
+    "        self.linear = nn.Linear(28 * 28, num_classes)\n",
+    "\n",
+    "    def forward(self, x: torch.Tensor):\n",
+    "        x = self.flatten(x)\n",
+    "        x = self.linear(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6e765ad7-c1c6-4166-bd7f-361666bd4016",
+   "metadata": {},
+   "source": [
+    "训练与测试过程与之前手动实现的几乎一致。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a58a23e1-368c-430a-ad62-0e256dff564d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10, Loss: 15.148970603942871, Acc: 0.7520999908447266\n",
+      "Epoch 2/10, Loss: 9.012335777282715, Acc: 0.7996999621391296\n",
+      "Epoch 3/10, Loss: 7.9114227294921875, Acc: 0.8095999956130981\n",
+      "Epoch 4/10, Loss: 7.427404403686523, Acc: 0.8215999603271484\n",
+      "Epoch 5/10, Loss: 7.084254264831543, Acc: 0.8277999758720398\n",
+      "Epoch 6/10, Loss: 6.885956287384033, Acc: 0.8274999856948853\n",
+      "Epoch 7/10, Loss: 6.808426380157471, Acc: 0.8327999711036682\n",
+      "Epoch 8/10, Loss: 6.647855758666992, Acc: 0.8323000073432922\n",
+      "Epoch 9/10, Loss: 6.560361862182617, Acc: 0.8317999839782715\n",
+      "Epoch 10/10, Loss: 6.5211310386657715, Acc: 0.8349999785423279\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 5e-3\n",
+    "num_epochs = 10\n",
+    "batch_size = 4096\n",
+    "num_classes = 10\n",
+    "device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "transform = transforms.Compose(\n",
+    "    [\n",
+    "        transforms.ToTensor(),\n",
+    "        transforms.Normalize((0.5,), (0.5,)),\n",
+    "    ]\n",
+    ")\n",
+    "train_dataset = datasets.FashionMNIST(\n",
+    "    root=\"./dataset\", train=True, transform=transform, download=True\n",
+    ")\n",
+    "test_dataset = datasets.FashionMNIST(\n",
+    "    root=\"./dataset\", train=False, transform=transform, download=True\n",
+    ")\n",
+    "train_loader = DataLoader(\n",
+    "    dataset=train_dataset,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=True,\n",
+    "    num_workers=4,\n",
+    "    pin_memory=True,\n",
+    ")\n",
+    "test_loader = DataLoader(\n",
+    "    dataset=test_dataset,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=True,\n",
+    "    num_workers=4,\n",
+    "    pin_memory=True,\n",
+    ")\n",
+    "\n",
+    "model = Model_3_2(num_classes).to(device)\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    total_epoch_loss = 0\n",
+    "    model.train()\n",
+    "    for images, targets in train_loader:\n",
+    "        optimizer.zero_grad()\n",
+    "\n",
+    "        images = images.to(device)\n",
+    "        targets = targets.to(device)\n",
+    "\n",
+    "        one_hot_targets = (\n",
+    "            torch.nn.functional.one_hot(targets, num_classes=num_classes)\n",
+    "            .to(device)\n",
+    "            .to(dtype=torch.float32)\n",
+    "        )\n",
+    "\n",
+    "        outputs = model(images)\n",
+    "        loss = criterion(outputs, one_hot_targets)\n",
+    "        total_epoch_loss += loss\n",
+    "\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "    model.eval()\n",
+    "    total_acc = 0\n",
+    "    with torch.no_grad():\n",
+    "        for image, targets in test_loader:\n",
+    "            image = image.to(device)\n",
+    "            targets = targets.to(device)\n",
+    "            outputs = model(image)\n",
+    "            total_acc += (outputs.argmax(1) == targets).sum()\n",
+    "    print(\n",
+    "        f\"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {total_acc / len(test_dataset)}\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59555b67-1650-4e1a-a98e-7906878bf3d0",
+   "metadata": {},
+   "source": [
+    "与手动实现的softmax回归相比较，nn.CrossEntropyLoss比手动实现的My_CrossEntropyLoss更加稳定，没有出现梯度爆炸的情况。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f40431f2-e77b-4ead-81a3-ff6451a8e452",
+   "metadata": {},
+   "source": [
+    "**实验心得体会**\n",
+    "\n",
+    "通过完成本次Pytorch基本操作实验，让我对Pytorch框架有了更加深入的理解。我接触深度学习主要是在大语言模型领域，比较熟悉微调大模型，但是涉及到底层的深度学习知识，我还有很多短板和不足。这次实验对我这方面的锻炼让我收获良多。\n",
+    "\n",
+    "首先是数据集的设置。如果数据没有进行归一化，很容易出现梯度爆炸。这是在我以前直接使用图片数据集的经历中没有遇到过的问题。\n",
+    "\n",
+    "在实现logistic回归模型时，通过手动实现各个组件如优化器、线性层等，让我对这些模块的工作原理有了更清晰的认识。尤其是在实现广播机制时，需要充分理解张量操作的维度变换规律。而使用Pytorch内置模块进行实现时，通过继承nn.Module可以自动获得許多功能，使代码更加简洁。\n",
+    "\n",
+    "在实现softmax回归时，则遇到了更大的困难。手动实现的模型很容易出现梯度爆炸的问题，而使用Pytorch内置的损失函数和优化器则可以稳定训练。这让我意识到了选择合适的优化方法的重要性。另外，Pytorch强大的自动微分机制也是构建深度神经网络的重要基础。\n",
+    "\n",
+    "通过这个实验，让我对Pytorch框架有了更加直观的感受，也让我看到了仅靠基础模块搭建复杂模型的难点所在。这些经验对我后续使用Pytorch构建数据集模型会很有帮助。"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Lab1/code/1.1.py b/Lab1/code/1.1.py
new file mode 100644
index 0000000..5f23159
--- /dev/null
+++ b/Lab1/code/1.1.py
@@ -0,0 +1,39 @@
+import torch
+
+A = torch.tensor([[1, 2, 3]])
+
+B = torch.tensor([[4],
+                  [5]])
+
+# 方法1: 使用PyTorch的减法操作符
+result1 = A - B
+
+# 方法2: 使用PyTorch的sub函数
+result2 = torch.sub(A, B)
+
+# 方法3: 手动实现广播机制并作差
+def mysub(a:torch.Tensor, b:torch.Tensor):
+    if not (
+        (a.size(0) == 1 and b.size(1) == 1) 
+        or 
+        (a.size(1) == 1 and b.size(0) == 1)
+        ):
+        raise ValueError("输入的张量大小无法满足广播机制的条件。")
+    else:
+        target_shape = torch.Size([max(A.size(0), B.size(0)), max(A.size(1), B.size(1))])
+        A_broadcasted = A.expand(target_shape)
+        B_broadcasted = B.expand(target_shape)
+        result = torch.zeros(target_shape, dtype=torch.int64).to(device=A_broadcasted.device)
+        for i in range(target_shape[0]):
+            for j in range(target_shape[1]):
+                result[i, j] = A_broadcasted[i, j] - B_broadcasted[i, j]
+        return result
+
+result3 = mysub(A, B)
+
+print("方法1的结果:")
+print(result1)
+print("方法2的结果:")
+print(result2)
+print("方法3的结果:")
+print(result3)
diff --git a/Lab1/code/1.2.py b/Lab1/code/1.2.py
new file mode 100644
index 0000000..66986c2
--- /dev/null
+++ b/Lab1/code/1.2.py
@@ -0,0 +1,23 @@
+import torch
+
+mean = 0
+stddev = 0.01
+
+P = torch.normal(mean=mean, std=stddev, size=(3, 2))
+Q = torch.normal(mean=mean, std=stddev, size=(4, 2))
+
+print("矩阵 P:")
+print(P)
+print("矩阵 Q:")
+print(Q)
+
+# 对矩阵Q进行转置操作，得到矩阵Q的转置Q^T
+QT = Q.T
+print("矩阵 QT:")
+print(QT)
+
+# 计算矩阵P和矩阵Q^T的矩阵相乘
+result = torch.matmul(P, QT)
+print("矩阵相乘的结果:")
+print(result)
+
diff --git a/Lab1/code/1.3.py b/Lab1/code/1.3.py
new file mode 100644
index 0000000..ef057e1
--- /dev/null
+++ b/Lab1/code/1.3.py
@@ -0,0 +1,12 @@
+import torch
+
+x = torch.tensor(1.0, requires_grad=True)
+y_1 = x**2
+with torch.no_grad():
+    y_2 = x**3
+
+y3 = y_1 + y_2
+
+y3.backward()
+
+print("梯度(dy_3/dx): ", x.grad.item())
diff --git a/Lab1/code/2.1.py b/Lab1/code/2.1.py
new file mode 100644
index 0000000..7b10711
--- /dev/null
+++ b/Lab1/code/2.1.py
@@ -0,0 +1,143 @@
+import numpy as np
+import torch
+from torch.autograd import Variable
+from torch.utils.data import Dataset, DataLoader
+from tqdm import tqdm
+import ipdb
+
+
+class My_BCELoss:
+    def __call__(self, prediction: torch.Tensor, target: torch.Tensor):
+        loss = -torch.mean(
+            target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction)
+        )
+        return loss
+
+
+class My_optimizer:
+    def __init__(self, params: list[torch.Tensor], lr: float):
+        self.params = params
+        self.lr = lr
+
+    def step(self):
+        for param in self.params:
+            param.data = param.data - self.lr * param.grad.data
+
+    def zero_grad(self):
+        for param in self.params:
+            if param.grad is not None:
+                param.grad.data.zero_()
+
+
+class My_Linear:
+    def __init__(self, input_feature: int, output_feature: int):
+        self.weight = torch.randn(
+            (output_feature, input_feature), requires_grad=True, dtype=torch.float32
+        )
+        self.bias = torch.randn(1, requires_grad=True, dtype=torch.float32)
+        self.params = [self.weight, self.bias]
+
+    def __call__(self, x):
+        return self.forward(x)
+
+    def forward(self, x):
+        x = torch.matmul(x, self.weight.T) + self.bias
+        return x
+
+    def to(self, device: str):
+        for param in self.params:
+            param.data = param.data.to(device=device)
+        return self
+
+    def parameters(self):
+        return self.params
+
+
+class Model:
+    def __init__(self):
+        self.linear = My_Linear(1, 1)
+        self.params = self.linear.params
+
+    def __call__(self, x):
+        return self.forward(x)
+
+    def forward(self, x):
+        x = self.linear(x)
+        x = torch.sigmoid(x)
+        return x
+
+    def to(self, device: str):
+        for param in self.params:
+            param.data = param.data.to(device=device)
+        return self
+
+    def parameters(self):
+        return self.params
+
+
+class My_Dataset(Dataset):
+    def __init__(self, data_size=1000000):
+        np.random.seed(0)
+        x = 2 * np.random.rand(data_size, 1)
+        noise = 0.2 * np.random.randn(data_size, 1)
+        y = 4 - 3 * x + noise
+        self.min_x, self.max_x = np.min(x), np.max(x)
+        min_y, max_y = np.min(y), np.max(y)
+        x = (x - self.min_x) / (self.max_x - self.min_x)
+        y = (y - min_y) / (max_y - min_y)
+        self.data = [[x[i][0], y[i][0]] for i in range(x.shape[0])]
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, index):
+        x, y = self.data[index]
+        return x, y
+
+
+learning_rate = 5e-2
+num_epochs = 10
+batch_size = 1024
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+dataset = My_Dataset()
+dataloader = DataLoader(
+    dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True
+)
+
+model = Model().to(device)
+criterion = My_BCELoss()
+optimizer = My_optimizer(model.parameters(), lr=learning_rate)
+
+for epoch in range(num_epochs):
+    total_epoch_loss = 0
+    total_epoch_pred = 0
+    total_epoch_target = 0
+    for index, (x, targets) in tqdm(enumerate(dataloader), total=len(dataloader)):
+        optimizer.zero_grad()
+        x = x.to(device).to(dtype=torch.float32)
+        targets = targets.to(device).to(dtype=torch.float32)
+        x = x.unsqueeze(1)
+        y_pred = model(x)
+        loss = criterion(y_pred, targets)
+        total_epoch_loss += loss.item()
+        total_epoch_target += targets.sum().item()
+        total_epoch_pred += y_pred.sum().item()
+
+        loss.backward()
+        optimizer.step()
+
+    print(
+        f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {1 - abs(total_epoch_pred - total_epoch_target) / total_epoch_target}"
+    )
+
+with torch.no_grad():
+    test_data = (np.array([[2]]) - dataset.min_x) / (dataset.max_x - dataset.min_x)
+    test_data = Variable(
+        torch.tensor(test_data, dtype=torch.float64), requires_grad=False
+    ).to(device)
+    predicted = model(test_data).to("cpu")
+    print(
+        f"Model weights: {model.linear.weight.item()}, bias: {model.linear.bias.item()}"
+    )
+    print(f"Prediction for test data: {predicted.item()}")
diff --git a/Lab1/code/2.2.py b/Lab1/code/2.2.py
new file mode 100644
index 0000000..1015a09
--- /dev/null
+++ b/Lab1/code/2.2.py
@@ -0,0 +1,89 @@
+import numpy as np
+import torch
+from torch.autograd import Variable
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from tqdm import tqdm
+import ipdb
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        self.linear = nn.Linear(1, 1, dtype=torch.float64)
+
+    def forward(self, x):
+        x = self.linear(x)
+        x = torch.sigmoid(x)
+        return x
+
+
+class My_Dataset(Dataset):
+    def __init__(self, data_size=1000000):
+        np.random.seed(0)
+        x = 2 * np.random.rand(data_size, 1)
+        noise = 0.2 * np.random.randn(data_size, 1)
+        y = 4 - 3 * x + noise
+        self.min_x, self.max_x = np.min(x), np.max(x)
+        min_y, max_y = np.min(y), np.max(y)
+        x = (x - self.min_x) / (self.max_x - self.min_x)
+        y = (y - min_y) / (max_y - min_y)
+        self.data = [[x[i][0], y[i][0]] for i in range(x.shape[0])]
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, index):
+        x, y = self.data[index]
+        return x, y
+
+
+learning_rate = 1e-2
+num_epochs = 10
+batch_size = 1024
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+dataset = My_Dataset()
+dataloader = DataLoader(
+    dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True
+)
+
+model = Model().to(device)
+criterion = nn.BCELoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
+
+for epoch in range(num_epochs):
+    total_epoch_loss = 0
+    total_epoch_pred = 0
+    total_epoch_target = 0
+    for index, (x, targets) in tqdm(enumerate(dataloader), total=len(dataloader)):
+        optimizer.zero_grad()
+
+        x = x.to(device)
+        targets = targets.to(device)
+
+        x = x.unsqueeze(1)
+        targets = targets.unsqueeze(1)
+        y_pred = model(x)
+        loss = criterion(y_pred, targets)
+        total_epoch_loss += loss.item()
+        total_epoch_target += targets.sum().item()
+        total_epoch_pred += y_pred.sum().item()
+
+        loss.backward()
+        optimizer.step()
+
+    print(
+        f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_epoch_loss}, Acc: {1 - abs(total_epoch_pred - total_epoch_target) / total_epoch_target}"
+    )
+
+with torch.no_grad():
+    test_data = (np.array([[2]]) - dataset.min_x) / (dataset.max_x - dataset.min_x)
+    test_data = Variable(
+        torch.tensor(test_data, dtype=torch.float64), requires_grad=False
+    ).to(device)
+    predicted = model(test_data).to("cpu")
+    print(
+        f"Model weights: {model.linear.weight.item()}, bias: {model.linear.bias.item()}"
+    )
+    print(f"Prediction for test data: {predicted.item()}")
diff --git a/Lab1/code/3.1.py b/Lab1/code/3.1.py
new file mode 100644
index 0000000..6dd8b47
--- /dev/null
+++ b/Lab1/code/3.1.py
@@ -0,0 +1,169 @@
+import numpy as np
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from tqdm import tqdm
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+
+
+def my_one_hot(indices: torch.Tensor, num_classes: int):
+    one_hot_tensor = torch.zeros(len(indices), num_classes).to(indices.device)
+    one_hot_tensor.scatter_(1, indices.view(-1, 1), 1)
+    return one_hot_tensor
+
+
+class My_CrossEntropyLoss:
+    def __call__(self, predictions: torch.Tensor, targets: torch.Tensor):
+        max_values = torch.max(predictions, dim=1, keepdim=True).values
+        exp_values = torch.exp(predictions - max_values)
+        softmax_output = exp_values / torch.sum(exp_values, dim=1, keepdim=True)
+
+        log_probs = torch.log(softmax_output)
+        nll_loss = -torch.sum(targets * log_probs, dim=1)
+        average_loss = torch.mean(nll_loss)
+        return average_loss
+
+
+class My_optimizer:
+    def __init__(self, params: list[torch.Tensor], lr: float):
+        self.params = params
+        self.lr = lr
+
+    def step(self):
+        for param in self.params:
+            param.data = param.data - self.lr * param.grad.data
+
+    def zero_grad(self):
+        for param in self.params:
+            if param.grad is not None:
+                param.grad.data.zero_()
+
+
+class My_Linear:
+    def __init__(self, input_feature: int, output_feature: int):
+        self.weight = torch.randn(
+            (output_feature, input_feature), requires_grad=True, dtype=torch.float32
+        )
+        self.bias = torch.randn(1, requires_grad=True, dtype=torch.float32)
+        self.params = [self.weight, self.bias]
+
+    def __call__(self, x: torch.Tensor):
+        return self.forward(x)
+
+    def forward(self, x: torch.Tensor):
+        x = torch.matmul(x, self.weight.T) + self.bias
+        return x
+
+    def to(self, device: str):
+        for param in self.params:
+            param.data = param.data.to(device=device)
+        return self
+
+    def parameters(self):
+        return self.params
+
+
+class My_Flatten:
+    def __call__(self, x: torch.Tensor):
+        return self.forward(x)
+
+    def forward(self, x: torch.Tensor):
+        x = x.view(x.shape[0], -1)
+        return x
+
+
+class Model_3_1:
+    def __init__(self, num_classes):
+        self.flatten = My_Flatten()
+        self.linear = My_Linear(28 * 28, num_classes)
+        self.params = self.linear.params
+
+    def __call__(self, x: torch.Tensor):
+        return self.forward(x)
+
+    def forward(self, x: torch.Tensor):
+        x = self.flatten(x)
+        x = self.linear(x)
+        return x
+
+    def to(self, device: str):
+        for param in self.params:
+            param.data = param.data.to(device=device)
+        return self
+
+    def parameters(self):
+        return self.params
+
+
+learning_rate = 5e-3
+num_epochs = 10
+batch_size = 4096
+num_classes = 10
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+transform = transforms.Compose(
+    [
+        transforms.ToTensor(),
+        transforms.Normalize((0.5,), (0.5,)),
+    ]
+)
+train_dataset = datasets.FashionMNIST(
+    root="./dataset", train=True, transform=transform, download=True
+)
+test_dataset = datasets.FashionMNIST(
+    root="./dataset", train=False, transform=transform, download=True
+)
+train_loader = DataLoader(
+    dataset=train_dataset,
+    batch_size=batch_size,
+    shuffle=True,
+    num_workers=4,
+    pin_memory=True,
+)
+test_loader = DataLoader(
+    dataset=test_dataset,
+    batch_size=batch_size,
+    shuffle=True,
+    num_workers=4,
+    pin_memory=True,
+)
+
+model = Model_3_1(num_classes).to(device)
+criterion = My_CrossEntropyLoss()
+optimizer = My_optimizer(model.parameters(), lr=learning_rate)
+
+for epoch in range(num_epochs):
+    total_epoch_loss = 0
+    for index, (images, targets) in tqdm(
+        enumerate(train_loader), total=len(train_loader)
+    ):
+        optimizer.zero_grad()
+
+        images = images.to(device)
+        targets = targets.to(device).to(dtype=torch.long)
+
+        one_hot_targets = (
+            my_one_hot(targets, num_classes=num_classes).to(device).to(dtype=torch.long)
+        )
+
+        outputs = model(images)
+        # ipdb.set_trace()
+        loss = criterion(outputs, one_hot_targets)
+        total_epoch_loss += loss
+
+        loss.backward()
+        optimizer.step()
+
+    total_acc = 0
+    with torch.no_grad():
+        for index, (image, targets) in tqdm(
+            enumerate(test_loader), total=len(test_loader)
+        ):
+            image = image.to(device)
+            targets = targets.to(device)
+            outputs = model(image)
+            total_acc += (outputs.argmax(1) == targets).sum()
+    print(
+        f"Epoch {epoch + 1}/{num_epochs} Train, Loss: {total_epoch_loss}, Acc: {total_acc / len(test_dataset)}"
+    )
diff --git a/Lab1/code/3.2.py b/Lab1/code/3.2.py
new file mode 100644
index 0000000..8624929
--- /dev/null
+++ b/Lab1/code/3.2.py
@@ -0,0 +1,96 @@
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from torch import nn
+from tqdm import tqdm
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+import ipdb
+
+
+class Model(nn.Module):
+    def __init__(self, num_classes):
+        super(Model, self).__init__()
+        self.flatten = nn.Flatten()
+        self.linear = nn.Linear(28 * 28, num_classes)
+
+    def forward(self, x: torch.Tensor):
+        x = self.flatten(x)
+        x = self.linear(x)
+        return x
+
+
+learning_rate = 5e-3
+num_epochs = 10
+batch_size = 4096
+num_classes = 10
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+
+transform = transforms.Compose(
+    [
+        transforms.ToTensor(),
+        transforms.Normalize((0.5,), (0.5,)),
+    ]
+)
+train_dataset = datasets.FashionMNIST(
+    root="./dataset", train=True, transform=transform, download=True
+)
+test_dataset = datasets.FashionMNIST(
+    root="./dataset", train=False, transform=transform, download=True
+)
+train_loader = DataLoader(
+    dataset=train_dataset,
+    batch_size=batch_size,
+    shuffle=True,
+    num_workers=4,
+    pin_memory=True,
+)
+test_loader = DataLoader(
+    dataset=test_dataset,
+    batch_size=batch_size,
+    shuffle=True,
+    num_workers=4,
+    pin_memory=True,
+)
+
+model = Model(num_classes).to(device)
+criterion = nn.CrossEntropyLoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
+
+for epoch in range(num_epochs):
+    total_epoch_loss = 0
+    model.train()
+    for index, (images, targets) in tqdm(
+        enumerate(train_loader), total=len(train_loader)
+    ):
+        optimizer.zero_grad()
+
+        images = images.to(device)
+        targets = targets.to(device)
+
+        one_hot_targets = (
+            torch.nn.functional.one_hot(targets, num_classes=num_classes)
+            .to(device)
+            .to(dtype=torch.float32)
+        )
+
+        outputs = model(images)
+        loss = criterion(outputs, one_hot_targets)
+        total_epoch_loss += loss
+
+        loss.backward()
+        optimizer.step()
+
+    model.eval()
+    total_acc = 0
+    with torch.no_grad():
+        for index, (image, targets) in tqdm(
+            enumerate(test_loader), total=len(test_loader)
+        ):
+            image = image.to(device)
+            targets = targets.to(device)
+            outputs = model(image)
+            total_acc += (outputs.argmax(1) == targets).sum()
+    print(
+        f"Epoch {epoch + 1}/{num_epochs} Train, Loss: {total_epoch_loss}, Acc: {total_acc / len(test_dataset)}"
+    )
diff --git a/images/school_logo.png b/images/school_logo.png
new file mode 100644
index 0000000..777f05e
Binary files /dev/null and b/images/school_logo.png differ