Skip to content
Snippets Groups Projects
prak3-checkpoint.ipynb 164 KiB
Newer Older
  • Learn to ignore specific revisions
  • Nikolai Killer's avatar
    Nikolai Killer committed

    {
     "cells": [
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "194EMZeTSLIk"
       },
       "source": [
        "#0.  Imports und Helper"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 6,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
        },
        "id": "6IoTrfAlzktH",
        "outputId": "f9bdc25e-4895-436b-91ab-00d3a38af883"
       },
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Defaulting to user installation because normal site-packages is not writeable\n",
          "Requirement already satisfied: torchinfo in /home/niko/.local/lib/python3.10/site-packages (1.8.0)\n",
          "Defaulting to user installation because normal site-packages is not writeable\n",
          "Requirement already satisfied: tqdm in /home/niko/.local/lib/python3.10/site-packages (4.66.1)\n"
         ]
        }
       ],
       "source": [
        "!pip install torchinfo\n",
        "!pip install matplotlib --quiet\n",
        "!pip install tqdm"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 7,
       "metadata": {
        "id": "dPt0DPgfLjEQ"
       },
       "outputs": [],
       "source": [
        "# Imports\n",
        "import copy\n",
        "\n",
        "import ipywidgets as widgets\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "\n",
        "import time\n",
        "import torch\n",
        "import torchvision\n",
        "#import torchvision.datasets as datasets\n",
        "import torch.nn.functional as F\n",
        "import torch.nn as nn\n",
        "import torch.optim as optim\n",
        "import tqdm\n",
        "\n",
        "import random\n",
        "import keras.datasets.imdb\n",
        "\n",
        "from torch.autograd import Variable\n",
        "from tqdm.auto import tqdm as tqdmauto\n",
        "import timeit"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 8,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
        },
        "id": "U6niQp1RNHxp",
        "outputId": "f07ba931-b6c5-4157-df21-91a429ff70a1"
       },
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Random seed 2021 has been set.\n"
         ]
        }
       ],
       "source": [
        "def set_seed(seed=None, seed_torch=True):\n",
        "  \"\"\"\n",
        "  Handles variability by controlling sources of randomness\n",
        "  through set seed values\n",
        "\n",
        "  Args:\n",
        "    seed: Integer\n",
        "      Set the seed value to given integer.\n",
        "      If no seed, set seed value to random integer in the range 2^32\n",
        "    seed_torch: Bool\n",
        "      Seeds the random number generator for all devices to\n",
        "      offer some guarantees on reproducibility\n",
        "\n",
        "  Returns:\n",
        "    Nothing\n",
        "  \"\"\"\n",
        "  if seed is None:\n",
        "    seed = np.random.choice(2 ** 32)\n",
        "  random.seed(seed)\n",
        "  np.random.seed(seed)\n",
        "  if seed_torch:\n",
        "    torch.manual_seed(seed)\n",
        "    torch.cuda.manual_seed_all(seed)\n",
        "    torch.cuda.manual_seed(seed)\n",
        "    torch.backends.cudnn.benchmark = False\n",
        "    torch.backends.cudnn.deterministic = True\n",
        "  print(f'Random seed {seed} has been set.')\n",
        "SEED = 2021\n",
        "set_seed(seed=SEED)\n",
        "DEVICE = \"cuda\"\n",
        "\n",
        "def zero_grad(params):\n",
        "  \"\"\"\n",
        "  Clear gradients as they accumulate on successive backward calls\n",
        "\n",
        "  Args:\n",
        "    params: an iterator over tensors\n",
        "      i.e., updating the Weights and biases\n",
        "\n",
        "  Returns:\n",
        "    Nothing\n",
        "  \"\"\"\n",
        "  for par in params:\n",
        "    if not(par.grad is None):\n",
        "      par.grad.data.zero_()\n",
        "\n",
        "\n",
        "def print_params(model):\n",
        "  \"\"\"\n",
        "  Lists the name and current value of the model's\n",
        "  named parameters\n",
        "\n",
        "  Args:\n",
        "    model: an nn.Module inherited model\n",
        "      Represents the ML/DL model\n",
        "\n",
        "  Returns:\n",
        "    Nothing\n",
        "  \"\"\"\n",
        "  for name, param in model.named_parameters():\n",
        "    if param.requires_grad:\n",
        "      print(name, param.data)\n",
        "\n",
        "def sample_minibatch(input_data, target_data, num_points=100):\n",
        "  \"\"\"\n",
        "  Sample a minibatch of size num_point from the provided input-target data\n",
        "\n",
        "  Args:\n",
        "    input_data: Tensor\n",
        "      Multi-dimensional tensor containing the input data\n",
        "    target_data: Tensor\n",
        "      1D tensor containing the class labels\n",
        "    num_points: Integer\n",
        "      Number of elements to be included in minibatch with default=100\n",
        "\n",
        "  Returns:\n",
        "    batch_inputs: Tensor\n",
        "      Minibatch inputs\n",
        "    batch_targets: Tensor\n",
        "      Minibatch targets\n",
        "  \"\"\"\n",
        "  # Sample a collection of IID indices from the existing data\n",
        "  batch_indices = np.random.choice(len(input_data), num_points)\n",
        "  # Use batch_indices to extract entries from the input and target data tensors\n",
        "  batch_inputs = input_data[batch_indices, :]\n",
        "  batch_targets = target_data[batch_indices]\n",
        "\n",
        "  return batch_inputs, batch_targets\n",
        "\n",
        "\n",
        "def gradient_update(loss, params, lr=1e-3):\n",
        "  \"\"\"\n",
        "  Perform a gradient descent update on a given loss over a collection of parameters\n",
        "\n",
        "  Args:\n",
        "    loss: Tensor\n",
        "      A scalar tensor containing the loss through which the gradient will be computed\n",
        "    params: List of iterables\n",
        "      Collection of parameters with respect to which we compute gradients\n",
        "    lr: Float\n",
        "      Scalar specifying the learning rate or step-size for the update\n",
        "\n",
        "  Returns:\n",
        "    Nothing\n",
        "  \"\"\"\n",
        "  # Clear up gradients as Pytorch automatically accumulates gradients from\n",
        "  # successive backward calls\n",
        "  zero_grad(params)\n",
        "\n",
        "  # Compute gradients on given objective\n",
        "  loss.backward()\n",
        "\n",
        "  with torch.no_grad():\n",
        "    for par in params:\n",
        "      # Here we work with the 'data' attribute of the parameter rather than the\n",
        "      # parameter itself.\n",
        "      # Hence - use the learning rate and the parameter's .grad.data attribute to perform an update\n",
        "      par.data -= lr * par.grad.data"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 9,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
        },
        "id": "tNDNF10dyqUm",
        "outputId": "f654597c-411a-45ac-f694-aba79e025aa8"
       },
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          " this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert  is an amazing actor and now the same being director  father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for  and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also  to the two little boy's that played the  of norman and paul they were just brilliant children are often left out of the  list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all \n",
          "17121\n",
          "17588\n",
          "x_train shape: torch.Size([17121, 10000])\n",
          "y_train shape: torch.Size([17121])\n",
          "17121 train samples\n",
          "17588 test samples\n"
         ]
        }
       ],
       "source": [
        "(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=10000, maxlen=250,)\n",
        "\n",
        "# print the first comment\n",
        "word_indizes_orig = keras.datasets.imdb.get_word_index(path=\"imdb_word_index.json\")\n",
        "word_indizes = {v: k for k, v in word_indizes_orig.items()}\n",
        "for i in x_train[0]:\n",
        "    print(f\"{word_indizes.get(i-3, '')}\", end=\" \")\n",
        "print()\n",
        "\n",
        "def vectorize_sequences(sequences, dimension=10000):\n",
        "    # all zero matrix of shape (len(sequences), dimension)\n",
        "    result = np.zeros((len(sequences), dimension))\n",
        "    for i,sequence in enumerate(sequences):\n",
        "        result[i, sequence] = 1\n",
        "    return result\n",
        "\n",
        "print(len(x_train))\n",
        "print(len(x_test))\n",
        "\n",
        "x_train = vectorize_sequences(x_train)\n",
        "x_test = vectorize_sequences(x_test)\n",
        "#x_train = np.expand_dims(x_train, -1)\n",
        "#x_test = np.expand_dims(x_test, -1)\n",
        "\n",
        "x_train = Variable(torch.from_numpy(x_train)).float().to(DEVICE)\n",
        "y_train = Variable(torch.from_numpy(y_train)).long().to(DEVICE)\n",
        "x_test  = Variable(torch.from_numpy(x_test)).float().to(DEVICE)\n",
        "y_test  = Variable(torch.from_numpy(y_test)).long().to(DEVICE)\n",
        "\n",
        "print(\"x_train shape:\", x_train.shape)\n",
        "print(\"y_train shape:\", y_train.shape)\n",
        "print(x_train.shape[0], \"train samples\")\n",
        "print(x_test.shape[0], \"test samples\")"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 66,
       "metadata": {},
       "outputs": [],
       "source": [
        "# Why so many data? I thought the imdb dataset consists only of 25000 reviews?"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "tKJZz5YsSSyT"
       },
       "source": [
        "# 1.  Softmax Implementieren\n",
        "Implementieren Sie die Softmax Funktion mit Numpy und stellen Sie zunächst sicher, dass diese die selben Ergebnisse liefert wie die Pytorch-Funktion im Beispiel. Vergleichen Sie dann Ihre Implementierungen mit anderen Gruppen und diskutieren Sie auch über Performance und numerische Stabilität. Erstellen Sie ein kleines Benchmark, um Performance und numerische Stabilität zu testen."
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 8,
       "metadata": {
        "id": "_80I03V8ogds"
       },
       "outputs": [],
       "source": [
        "def softmax(vector):\n",
        "    exp_vec = np.exp(vector)\n",
        "    vec_sum = exp_vec.sum()\n",
        "    return exp_vec / vec_sum\n",
        "    \n",
        "def softmax_save(vector):\n",
        "    norm_vec = vector - torch.max(vector)\n",
        "    exp_vec = np.exp(norm_vec)\n",
        "    vec_sum = exp_vec.sum()\n",
        "    return exp_vec / vec_sum"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 9,
       "metadata": {},
       "outputs": [],
       "source": [
        "# test softmax\n",
        "m = nn.Softmax(dim=1)\n",
        "tolerance = 1e-6\n",
        "for i in range(1000):\n",
        "    input = torch.randn(1, 3)\n",
        "    soft1 = softmax(input)\n",
        "    soft2 = m(input)\n",
        "    soft3 = softmax_save(input)\n",
        "    if not np.allclose(soft1, soft2, tolerance) or not np.allclose(soft2, soft3, tolerance):\n",
        "        raise Exception(f\"Error with Tensor: {input}\")"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 12,
       "metadata": {
        "id": "x0VacAxQu5JS"
       },
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "tensor([ 3.4028e+38, -3.4028e+38])\n",
          "tensor([1., 0.])\n",
          "Avg time torch: 0.00021779541100002574 vs own save implementation: 0.0004932853010000429 vs unsafe: 0.00040290728599981664\n"
         ]
        }
       ],
       "source": [
        "# Numerical Stability\n",
        "info = torch.finfo(torch.float32)\n",
        "max_min_test = torch.tensor([info.max, info.min])\n",
        "print(max_min_test)\n",
        "print(softmax_save(max_min_test))\n",
        "\n",
        "# test performance\n",
        "counter = 1000\n",
        "times_torch = np.zeros(counter)\n",
        "times_own_safe = np.zeros(counter)\n",
        "times_own_unsafe = np.zeros(counter)\n",
        "m = nn.Softmax(dim=1)\n",
        "for i in range(counter):\n",
        "    input = torch.randn(1, 200000)\n",
        "    time_torch = timeit.timeit(lambda: m(input), number=1)\n",
        "    time_own_safe = timeit.timeit(lambda: softmax_save(input), number=1)\n",
        "    time_own = timeit.timeit(lambda: softmax(input), number=1)\n",
        "    times_torch[i] = time_torch\n",
        "    times_own_safe[i] = time_own_safe\n",
        "    times_own_unsafe[i] = time_own\n",
        "print(f\"Avg time torch: {times_torch.mean()} vs own save implementation: {times_own_safe.mean()} vs unsafe: {times_own_unsafe.mean()}\")"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "c0u-OqU8U-sL"
       },
       "source": [
        "# 2.  Regularisierung Implementieren\n",
        "\n",
        "Unten finden Sie einen Pytorch-SGD Schritt mit eingebauter L2-Regularisierung und ohne. Interpretieren Sie die unterschiedlichen Ausgaben. Modifizieren Sie den ersten Codabschnitt mit einer eigenen L2-Regularisierung so, dass identische Ergebnisse erzeugt werden. Sie können dazu die noch nicht verwendete und noch falsch definierte Variable \"regtermwrong\" umdefinieren und zu einem späteren Zeitpunkt im Code darauf zurückgreifen. ACHTUNG: weight_decay*2=lambda."
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 14,
       "metadata": {
        "id": "O0Hn585ETRWD"
       },
       "outputs": [],
       "source": [
        "#Datendefinition\n",
        "np.random.seed(123)\n",
        "np.set_printoptions(8, suppress=True)\n",
        "\n",
        "x_numpy = np.random.random((3, 4)).astype(np.double)\n",
        "w_numpy = np.random.random((4, 5)).astype(np.double)\n",
        "w_numpy[0,0] =9.9\n",
        "x_torch = torch.tensor(x_numpy, requires_grad=True)\n"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 26,
       "metadata": {
        "id": "S5XEpjWFTTzi"
       },
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Original weights tensor([[9.9000, 0.0597, 0.3980, 0.7380, 0.1825],\n",
          "        [0.1755, 0.5316, 0.5318, 0.6344, 0.8494],\n",
          "        [0.7245, 0.6110, 0.7224, 0.3230, 0.3618],\n",
          "        [0.2283, 0.2937, 0.6310, 0.0921, 0.4337]], dtype=torch.float64,\n",
          "       requires_grad=True)\n",
          "0 weight decay tensor([[ 7.7303, -0.1419,  0.1287,  0.4007, -0.0437],\n",
          "        [ 0.0302,  0.3151,  0.3153,  0.3974,  0.5694],\n",
          "        [ 0.4245,  0.3337,  0.4229,  0.1033,  0.1344],\n",
          "        [-0.0139,  0.0385,  0.3083, -0.1228,  0.1504]], dtype=torch.float64,\n",
          "       requires_grad=True)\n"
         ]
        }
       ],
       "source": [
        "# ohne Regularisierung\n",
        "w_torch = torch.tensor(w_numpy, requires_grad=True)\n",
        "print('Original weights', w_torch)\n",
        "\n",
        "lr = 0.1\n",
        "sgd = torch.optim.SGD([w_torch], lr=lr, weight_decay=0)\n",
        "omega = w_torch.square().sum()\n",
        "y_torch = torch.matmul(x_torch, w_torch)\n",
        "loss = y_torch.sum() + 1 * omega #\n",
        "\n",
        "\n",
        "sgd.zero_grad()\n",
        "loss.backward()\n",
        "sgd.step()\n",
        "\n",
        "w_grad = w_torch.grad.data.numpy()\n",
        "print('0 weight decay', w_torch)\n"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 25,
       "metadata": {
        "id": "WiQW-Y4VkH7v"
       },
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Reset Original weights tensor([[9.9000, 0.0597, 0.3980, 0.7380, 0.1825],\n",
          "        [0.1755, 0.5316, 0.5318, 0.6344, 0.8494],\n",
          "        [0.7245, 0.6110, 0.7224, 0.3230, 0.3618],\n",
          "        [0.2283, 0.2937, 0.6310, 0.0921, 0.4337]], dtype=torch.float64,\n",
          "       requires_grad=True)\n",
          "1 weight decay tensor([[ 7.7303, -0.1419,  0.1287,  0.4007, -0.0437],\n",
          "        [ 0.0302,  0.3151,  0.3153,  0.3974,  0.5694],\n",
          "        [ 0.4245,  0.3337,  0.4229,  0.1033,  0.1344],\n",
          "        [-0.0139,  0.0385,  0.3083, -0.1228,  0.1504]], dtype=torch.float64,\n",
          "       requires_grad=True)\n"
         ]
        }
       ],
       "source": [
        "#mit Regularisierung\n",
        "\n",
        "\n",
        "w_torch = torch.tensor(w_numpy, requires_grad=True)\n",
        "\n",
        "print('Reset Original weights', w_torch)\n",
        "\n",
        "sgd = torch.optim.SGD([w_torch], lr=lr, weight_decay=2)\n",
        "\n",
        "y_torch = torch.matmul(x_torch, w_torch)\n",
        "loss = y_torch.sum()\n",
        "\n",
        "sgd.zero_grad()\n",
        "loss.backward()\n",
        "sgd.step()\n",
        "\n",
        "w_grad = w_torch.grad.data.numpy()\n",
        "print('1 weight decay', w_torch)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 27,
       "metadata": {},
       "outputs": [],
       "source": [
        "# FRAGE:\n",
        "# Wieso ist der weight_decay hier doppelt? also warum etspricht alpha * 2 = lambda?"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "sQk-ciLBYGnu"
       },
       "source": [
        "# 3.  Einfaches MLP in Pytorch\n",
        "Machen Sie sich ein wenig mit dem IMDB Datensatz und den für Sie erstellten Datenstrukturen in x/y_train/test vertraut."
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "mLeOKvUxMunF"
       },
       "source": [
        "## 3.1 Modell erstellen und  Angaben zur Modellgröße verstehen\n",
        "Definieren Sie ein Pytorch Multilayer Perzeptron mit der Größe des IMDB-Dictionaries für one-hot-encodierte Wörte als Eingabe (Sigmoid Aktivierung), 50 Neuronen im Hidden Layer und 2 Ausgabeneuronen. Layer 1 und 2 Ihres Netzes verwendet die Sigmoid-Aktivierungsfunktion, Layer 3 die Softmax-Aktivierungsfunktion.\n",
        "\n",
        "Generieren Sie Modell-Summary mit torchinfo und erklären Sie, was die ausgegebenen Werte bedeuten und wie diese zustande kommen."
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 10,
       "metadata": {
        "id": "TikQht7LmNnc"
       },
       "outputs": [],
       "source": [
        "#Ihr Code hier\n",
        "class Model(nn.Module):\n",
        "    def __init__(self):\n",
        "        super().__init__()\n",
        "        self.layers = nn.Sequential(\n",
        "            nn.Linear(10000, 50),\n",
        "            nn.Sigmoid(),\n",
        "            nn.Linear(50,50),\n",
        "            nn.Sigmoid(),\n",
        "            nn.Linear(50,2),\n",
        "            nn.Softmax()\n",
        "        )\n",
        "    def forward(self, x):\n",
        "        return self.layers(x)\n"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "GUoAJo8WMzfb"
       },
       "source": [
        "## 3.2 Modell trainieren und Performancekurven interpretieren\n",
        "Nutzen Sie den untenstehenedn Code um Ihr Modell zu trainieren. Interpretieren und diskutieren Sie die entstehenden Performancekurven. Falls Sie einen unerwarteten Anstieg Ihres Losses beobachten, recherchieren Sie wie Sie diese mit dem Einbau einer einzelnen Verbesserung innerhalb des gegebenen SGD Lernverfahrens beheben können. ACHTUNG: Wenn Sie Ihr Modell nicht oben neu initialisieren, optimieren Sie weiter auf den schon veränderten Parametern."
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 45,
       "metadata": {
        "id": "JD2AkGHGrpGV"
       },
       "outputs": [
        {
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "100%|█████████████████████████████████████████████████████████████████████████████████| 290/290 [00:06<00:00, 44.49it/s]\n"
         ]
        },
        {
         "data": {
          "image/png": "",
          "text/plain": [
           "<Figure size 1200x600 with 3 Axes>"
          ]
         },
         "metadata": {},
         "output_type": "display_data"
        }
       ],
       "source": [
        "model = Model().to(DEVICE)\n",
        "\n",
        "EPOCHS  = 290 #@param {type:\"slider\", min:2, max:1000, step:1}\n",
        "RATE = 0.9 #@param {type:\"slider\", min:0.001, max:2, step:0.001}\n",
        "optimizer = torch.optim.SGD(model.parameters(), lr=RATE, weight_decay=0)\n",
        "loss_fn   = nn.CrossEntropyLoss()\n",
        "loss_list     = np.zeros((EPOCHS,))\n",
        "accuracy_list = np.zeros((EPOCHS,))\n",
        "accuracy_list_test = np.zeros((EPOCHS,))\n",
        "\n",
        "\n",
        "\n",
        "for epoch in tqdm.trange(EPOCHS):\n",
        "    y_pred = model(x_train)\n",
        "    #loss = loss_fn(y_pred, y_train)\n",
        "    loss = loss_fn(y_pred, y_train)# + 0.01 *l2_reg(model)\n",
        "    loss_list[epoch] = loss.item()\n",
        "\n",
        "    # Zero gradients\n",
        "    optimizer.zero_grad()\n",
        "\n",
        "    loss.backward()\n",
        "    #torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)#, args.clip)\n",
        "    optimizer.step()\n",
        "\n",
        "    with torch.no_grad():\n",
        "        y_pred = model(x_train)\n",
        "        correct = (torch.argmax(y_pred, dim=1) == y_train).type(torch.FloatTensor)\n",
        "        accuracy_list[epoch] = correct.mean()\n",
        "        y_pred = model(x_test)\n",
        "        correct = (torch.argmax(y_pred, dim=1) == y_test).type(torch.FloatTensor)\n",
        "        accuracy_list_test[epoch] = correct.mean()\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "fig, (ax1, ax2, ax3) = plt.subplots(3, figsize=(12, 6), sharex=True)\n",
        "\n",
        "ax1.plot(accuracy_list)\n",
        "ax1.set_ylabel(\"train accuracy\")\n",
        "ax2.plot(loss_list)\n",
        "ax2.set_ylabel(\"train loss\")\n",
        "ax3.plot(accuracy_list_test)\n",
        "ax3.set_ylabel(\"test acc\")\n",
        "ax3.set_xlabel(\"epochs\");"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "F3kyvbJaTjZ7"
       },
       "source": [
        "## 3.3.&nbsp;Momentum Implementieren\n",
        "Vervollständigen Sie Methode momentum_update. Überlegen Sie sich, wie Sie die Korrektheit mit einem Durchlauf inkl. Momentum Update auf Ihrem oben definierten Modell prüfen können"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 46,
       "metadata": {
        "id": "36whMhqnMKuc"
       },
       "outputs": [],
       "source": [
        "def momentum_update(loss, params, grad_vel, lr=1e-3, beta=0.8):\n",
        "  # Clear up gradients as Pytorch automatically accumulates gradients from\n",
        "  # successive backward calls\n",
        "  zero_grad(params)\n",
        "  # Compute gradients on given objective\n",
        "  loss.backward()\n",
        "\n",
        "  with torch.no_grad():\n",
        "    for (par, vel) in zip(params, grad_vel):\n",
        "        if vel != None:\n",
        "            par.grad.data += beta * vel\n",
        "        par.data -= lr * par.grad.data"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 48,
       "metadata": {},
       "outputs": [
        {
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "100%|█████████████████████████████████████████████████████████████████████████████████| 300/300 [00:06<00:00, 45.36it/s]\n"
         ]
        },
        {
         "data": {
          "image/png": "",
          "text/plain": [
           "<Figure size 1200x600 with 3 Axes>"
          ]
         },
         "metadata": {},
         "output_type": "display_data"
        }
       ],
       "source": [
        "model = Model().to(DEVICE)\n",
        "\n",
        "EPOCHS  = 300 #@param {type:\"slider\", min:2, max:1000, step:1}\n",
        "RATE = 0.9 #@param {type:\"slider\", min:0.001, max:2, step:0.001}\n",
        "optimizer = torch.optim.SGD(model.parameters(), lr=RATE, weight_decay=0)\n",
        "loss_fn   = nn.CrossEntropyLoss()\n",
        "loss_list     = np.zeros((EPOCHS,))\n",
        "accuracy_list = np.zeros((EPOCHS,))\n",
        "accuracy_list_test = np.zeros((EPOCHS,))\n",
        "\n",
        "params = list(model.parameters())\n",
        "\n",
        "for epoch in tqdm.trange(EPOCHS):\n",
        "    y_pred = model(x_train)\n",
        "    #loss = loss_fn(y_pred, y_train)\n",
        "    loss = loss_fn(y_pred, y_train)# + 0.01 *l2_reg(model)\n",
        "    loss_list[epoch] = loss.item()\n",
        "\n",
        "\n",
        "    vel = [param.grad for param in params]\n",
        "    # Zero gradients\n",
        "    optimizer.zero_grad()\n",
        "\n",
        "    #loss.backward()\n",
        "    \n",
        "    #momentum_update(loss, params, vel)\n",
        "    gradient_update(loss, params)\n",
        "    #torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)#, args.clip)\n",
        "    #optimizer.step()\n",
        "\n",
        "    with torch.no_grad():\n",
        "        y_pred = model(x_train)\n",
        "        correct = (torch.argmax(y_pred, dim=1) == y_train).type(torch.FloatTensor)\n",
        "        accuracy_list[epoch] = correct.mean()\n",
        "        y_pred = model(x_test)\n",
        "        correct = (torch.argmax(y_pred, dim=1) == y_test).type(torch.FloatTensor)\n",
        "        accuracy_list_test[epoch] = correct.mean()\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "fig, (ax1, ax2, ax3) = plt.subplots(3, figsize=(12, 6), sharex=True)\n",
        "\n",
        "ax1.plot(accuracy_list)\n",
        "ax1.set_ylabel(\"train accuracy\")\n",
        "ax2.plot(loss_list)\n",
        "ax2.set_ylabel(\"train loss\")\n",
        "ax3.plot(accuracy_list_test)\n",
        "ax3.set_ylabel(\"test acc\")\n",
        "ax3.set_xlabel(\"epochs\");"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "OPLRI_RKn2Mc"
       },
       "source": [
        "## 3.4 Experimente zum Lernverhalten mit Momentum und Batch Size\n",
        "Im folgenden können Sie für ein festgelegtes Zeitbudget schauen, wie sich der Loss Ihres neuronalen Netzes innerhalb dieser Zeit entwickelt.\n",
        "Experimentieren Sie zunächst mit den Voreinstellungen mit und ohne Momentum, probieren Sie dann eigene Einstellungen aus. Diskutieren Sie das visualisierte Lernverhalten insbesondere bzgl. unterschiedlicher Batch Sizes."
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 30,
       "metadata": {
        "id": "ebevIBNQt4-l"
       },
       "outputs": [
        {
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "b30f006d8a304a5f9fc519439876c549",
           "version_major": 2,
           "version_minor": 0
          },
          "text/plain": [
           "interactive(children=(Text(value='1, 20, 500, 17000', continuous_update=False, description='batch_sizes'), Tex…"
          ]
         },
         "metadata": {},
         "output_type": "display_data"
        }
       ],
       "source": [
        "@widgets.interact_manual\n",
        "def minibatch_experiment(batch_sizes='1, 20, 500, 17000',\n",
        "                         lrs='0.9, 0.9, 0.9, 0.9',\n",
        "                         time_budget=widgets.Dropdown(options=[\"0.05\", \"0.5\",  \"2.0\", \"5.0\", \"7.5\"],\n",
        "                                                      value=\"5.0\"),\n",
        "                         use_momentum = widgets.ToggleButton(value=True)):\n",
        "\n",
        "  \"\"\"\n",
        "  Demonstration of minibatch experiment\n",
        "\n",
        "  Args:\n",
        "    batch_sizes: String\n",
        "      Size of minibatches\n",
        "    lrs: String\n",
        "      Different learning rates\n",
        "    time_budget: widget dropdown instance\n",
        "      Different time budgets with default=2.5s\n",
        "\n",
        "  Returns:\n",
        "    Nothing\n",
        "  \"\"\"\n",
        "  batch_sizes = [int(s) for s in batch_sizes.split(',')]\n",
        "  lrs = [float(s) for s in lrs.split(',')]\n",
        "\n",
        "  LOSS_HIST = {_:[] for _ in batch_sizes}\n",
        "\n",
        "  #X, y = train_set.data, train_set.targets\n",
        "  base_model = Model().to(DEVICE)\n",
        "  #base_model = MLP(in_dim=784, out_dim=10, hidden_dims=[100, 100])\n",
        "\n",
        "  for id, batch_size in enumerate(tqdm.auto.tqdm(batch_sizes)):\n",
        "    start_time = time.time()\n",
        "    # Create a new copy of the model for each batch size\n",
        "    model = copy.deepcopy(base_model)\n",
        "    params = list(model.parameters())\n",
        "    lr = lrs[id]\n",
        "    # Fixed budget per choice of batch size\n",
        "    #initial_vel = [torch.randn_like(p) for p in model.parameters()]\n",
        "    aux_tensors = [torch.zeros_like(_) for _ in params]\n",
        "    while (time.time() - start_time) < float(time_budget):\n",
        "      data, labels = sample_minibatch(x_train, y_train, batch_size)\n",
        "      loss = loss_fn(model(data), labels)\n",
        "      if use_momentum:\n",
        "        momentum_update(loss, params, grad_vel=aux_tensors, lr=lr, beta=0.5)\n",
        "      else:\n",
        "        gradient_update(loss, params, lr=lr)\n",
        "      LOSS_HIST[batch_size].append([time.time() - start_time,\n",
        "                                    loss.item()])\n",
        "\n",
        "  fig, axs = plt.subplots(1, len(batch_sizes), figsize=(10, 3))\n",
        "  for ax, batch_size in zip(axs, batch_sizes):\n",
        "    plot_data = np.array(LOSS_HIST[batch_size])\n",
        "    ax.plot(plot_data[:, 0], plot_data[:, 1], label=batch_size,\n",
        "            alpha=0.8)\n",
        "    #ax.set_title('Batch size: ' + str(batch_size) + ' #: ' + str(batch_size*len(LOSS_HIST[batch_size])))\n",
        "    ax.set_title(' #: ' + str(batch_size*len(LOSS_HIST[batch_size])))\n",
        "    ax.set_xlabel('Seconds')\n",
        "    ax.set_ylabel('Loss')\n",
        "  plt.show()\n",
        "  #return(LOSS_HIST)\n"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "e9yCfSXR1The"
       },
       "source": [
        "![image.png]()"
       ]
      }
     ],
     "metadata": {
      "accelerator": "GPU",
      "colab": {
       "gpuType": "T4",
       "provenance": [],
       "toc_visible": true
      },
      "kernelspec": {
       "display_name": "Python 3 (ipykernel)",
       "language": "python",
       "name": "python3"
      },
      "language_info": {
       "codemirror_mode": {
        "name": "ipython",
        "version": 3
       },
       "file_extension": ".py",
       "mimetype": "text/x-python",
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
       "version": "3.10.12"
      }
     },
     "nbformat": 4,
     "nbformat_minor": 4
    }