experimental

2024-04-28 22:27:25 +08:00
parent 2d95b112c5
commit 215cde2d19
2 changed files with 689 additions and 38 deletions
--- a/cs2109s/labs/final/final.py
+++ b/cs2109s/labs/final/final.py
@@ -17,17 +17,17 @@ import numpy as np
 import torch
 import os

-
 class CNN3D(nn.Module):
-    def __init__(self):
+    def __init__(self, hidden_size=32, dropout=0.0):
        super(CNN3D, self).__init__()
-        self.conv1 = nn.Conv3d(1, 32, kernel_size=3, stride=1, padding=1)
-        self.conv2 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
-        self.batchnorm = nn.BatchNorm3d(32)
+        self.conv1 = nn.Conv3d(1, hidden_size, kernel_size=3, stride=1, padding=1)
+        self.batchnorm = nn.BatchNorm3d(hidden_size)
+        self.conv2 = nn.Conv3d(hidden_size, hidden_size*2, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool3d(kernel_size=2, stride=2)
-        self.fc1 = nn.Linear(1024, 256)  # Calculate input size based on output from conv3
+        self.fc1 = nn.Linear(hidden_size*32, 256)  # Calculate input size based on output from conv3
        self.fc2 = nn.Linear(256, 6)
+        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.conv1(x)
@@ -37,6 +37,7 @@ class CNN3D(nn.Module):
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
+        x = self.dropout(x)

        x = x.view(x.size(0), -1)  # Flatten features for fully connected layers
        x = self.fc1(x)
@@ -49,7 +50,6 @@ def train(model, criterion, optimizer, loader, epochs=5):
        for idx, (inputs, labels) in enumerate(loader):
            optimizer.zero_grad()
            outputs = model(inputs)
-            # print(outputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
@@ -57,44 +57,22 @@ def train(model, criterion, optimizer, loader, epochs=5):
    return model


-def process_data(X, y):
-    y = np.array(y)
-    X = np.array([video[:6] for video in X])
-    tensor_videos = torch.tensor(X, dtype=torch.float32)
-    # Clip values to 0 and 255
-    tensor_videos = np.clip(tensor_videos, 0, 255)
-    # Replace NaNs in each frame, with the average of the frame. This was generated with GPT
-    for i in range(tensor_videos.shape[0]):
-        for j in range(tensor_videos.shape[1]):
-            tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(
-                tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])
-    # Undersample the data for each of the 6 classes. Select max of 300 samples for each class
-    # Very much generated with the assitance of chatGPT with some modifications
-    # Get the indices of each class
-    indices = [np.argwhere(y == i).squeeze(1) for i in range(6)]
-    # Get the number of samples to take for each class
-    num_samples_to_take = 600
-    # Get the indices of the samples to take
-    indices_to_take = [np.random.choice(indices[i], num_samples_to_take, replace=True) for i in range(6)]
-    # Concatenate the indices
-    indices_to_take = np.concatenate(indices_to_take)
-    # Select the samples
-    tensor_videos = tensor_videos[indices_to_take].unsqueeze(1)
-    y = y[indices_to_take]
-    return torch.Tensor(tensor_videos), torch.Tensor(y).long()


 class Model():
-    def __init__(self):
-        self.model = CNN3D()
+    def __init__(self, batch_size=8,lr=0.001,epochs=10, dropout=0.0, hidden_size=32):
+        self.batch_size = batch_size
+        self.lr = lr
+        self.epochs = epochs
+        self.model = CNN3D(dropout=dropout, hidden_size=hidden_size)
        self.criterion = nn.CrossEntropyLoss()
-        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
+        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)

    def fit(self, X, y):
-        X, y = process_data(X, y)
+        X, y = self.process_data(X, y)
        train_dataset = torch.utils.data.TensorDataset(X, y)
-        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
-        train(self.model, self.criterion, self.optimizer, train_loader, 10)
+        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
+        train(self.model, self.criterion, self.optimizer, train_loader, self.epochs)

    def predict(self, X):
        self.model.eval()
@@ -103,6 +81,12 @@ class Model():
            tensor_videos = torch.tensor(X, dtype=torch.float32)
            # Clip values to 0 and 255
            tensor_videos = np.clip(tensor_videos, 0, 255)
+            # TEMP
+            threshold = 180
+            tensor_videos[tensor_videos > threshold] = 255
+            tensor_videos[tensor_videos < threshold] = 0
+            # END TEMP
+
            # Replace NaNs in each frame, with the average of the frame. This was generated with GPT
            for i in range(tensor_videos.shape[0]):
                for j in range(tensor_videos.shape[1]):
@@ -111,6 +95,37 @@ class Model():
            X = torch.Tensor(tensor_videos.unsqueeze(1))
            result = self.model(X)
        return torch.max(result, dim=1)[1].numpy()
+    def process_data(self, X, y, n_samples=600):
+        y = np.array(y)
+        X = np.array([video[:6] for video in X])
+        tensor_videos = torch.tensor(X, dtype=torch.float32)
+        # Clip values to 0 and 255
+        tensor_videos = np.clip(tensor_videos, 0, 255)
+        # TEMP
+        threshold = 180
+        tensor_videos[tensor_videos > threshold] = 255
+        tensor_videos[tensor_videos < threshold] = 0
+        # END TEMP
+
+        # Replace NaNs in each frame, with the average of the frame. This was generated with GPT
+        for i in range(tensor_videos.shape[0]):
+            for j in range(tensor_videos.shape[1]):
+                tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(
+                    tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])
+        # Undersample the data for each of the 6 classes. Select max of 300 samples for each class
+        # Very much generated with the assitance of chatGPT with some modifications
+        # Get the indices of each class
+        indices = [np.argwhere(y == i).squeeze(1) for i in range(6)]
+        # Get the number of samples to take for each class
+        # Get the indices of the samples to take
+        indices_to_take = [np.random.choice(indices[i], n_samples, replace=True) for i in range(6)]
+        # Concatenate the indices
+        indices_to_take = np.concatenate(indices_to_take)
+        # Select the samples
+        tensor_videos = tensor_videos[indices_to_take].unsqueeze(1)
+        y = y[indices_to_take]
+        return torch.Tensor(tensor_videos), torch.Tensor(y).long()
+

 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

@@ -118,6 +133,7 @@ not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()
 y_test = [y_test[i] for i in not_nan_indices]
 X_test = [X_test[i] for i in not_nan_indices]

+print("init model")
 model = Model()
 model.fit(X_train, y_train)

--- a/cs2109s/labs/final/optuna.ipynb
+++ b/cs2109s/labs/final/optuna.ipynb
@@ -0,0 +1,635 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [
+    "from torch import nn\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import os\n",
+    "\n",
+    "class CNN3D(nn.Module):\n",
+    "    def __init__(self, hidden_size=32, dropout=0.0):\n",
+    "        super(CNN3D, self).__init__()\n",
+    "        self.conv1 = nn.Conv3d(1, hidden_size, kernel_size=3, stride=1, padding=1)\n",
+    "        self.batchnorm = nn.BatchNorm3d(hidden_size)\n",
+    "        self.conv2 = nn.Conv3d(hidden_size, hidden_size*2, kernel_size=3, stride=1, padding=1)\n",
+    "        self.relu = nn.ReLU()\n",
+    "        self.maxpool = nn.MaxPool3d(kernel_size=2, stride=2)\n",
+    "        self.fc1 = nn.Linear(hidden_size*32, 256)  # Calculate input size based on output from conv3\n",
+    "        self.fc2 = nn.Linear(256, 6)\n",
+    "        self.dropout = nn.Dropout(dropout)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.conv1(x)\n",
+    "        x = self.relu(x)\n",
+    "        x = self.maxpool(x)\n",
+    "        x = self.batchnorm(x)\n",
+    "        x = self.conv2(x)\n",
+    "        x = self.relu(x)\n",
+    "        x = self.maxpool(x)\n",
+    "        x = self.dropout(x)\n",
+    "\n",
+    "        x = x.view(x.size(0), -1)  # Flatten features for fully connected layers\n",
+    "        x = self.fc1(x)\n",
+    "        x = self.relu(x)\n",
+    "        x = self.fc2(x)\n",
+    "        return x\n",
+    "\n",
+    "def train(model, criterion, optimizer, loader, epochs=5):\n",
+    "    for epoch in range(epochs):\n",
+    "        for idx, (inputs, labels) in enumerate(loader):\n",
+    "            optimizer.zero_grad()\n",
+    "            outputs = model(inputs)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "        # print(f'Epoch {epoch}, Loss: {loss.item()}')\n",
+    "    return model\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "class Model():\n",
+    "    def __init__(self, batch_size=8,lr=0.001,epochs=10, dropout=0.0, hidden_size=32):\n",
+    "        self.batch_size = batch_size\n",
+    "        self.lr = lr\n",
+    "        self.epochs = epochs\n",
+    "        self.model = CNN3D(dropout=dropout, hidden_size=hidden_size)\n",
+    "        self.criterion = nn.CrossEntropyLoss()\n",
+    "        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)\n",
+    "\n",
+    "    def fit(self, X, y):\n",
+    "        X, y = self.process_data(X, y)\n",
+    "        train_dataset = torch.utils.data.TensorDataset(X, y)\n",
+    "        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)\n",
+    "        train(self.model, self.criterion, self.optimizer, train_loader, self.epochs)\n",
+    "\n",
+    "    def predict(self, X):\n",
+    "        self.model.eval()\n",
+    "        with torch.no_grad():\n",
+    "            X = np.array([video[:6] for video in X])\n",
+    "            tensor_videos = torch.tensor(X, dtype=torch.float32)\n",
+    "            # Clip values to 0 and 255\n",
+    "            tensor_videos = np.clip(tensor_videos, 0, 255)\n",
+    "            # Replace NaNs in each frame, with the average of the frame. This was generated with GPT\n",
+    "            for i in range(tensor_videos.shape[0]):\n",
+    "                for j in range(tensor_videos.shape[1]):\n",
+    "                    tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(\n",
+    "                        tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])\n",
+    "            X = torch.Tensor(tensor_videos.unsqueeze(1))\n",
+    "            result = self.model(X)\n",
+    "        return torch.max(result, dim=1)[1].numpy()\n",
+    "    def process_data(self, X, y, n_samples=600):\n",
+    "        y = np.array(y)\n",
+    "        X = np.array([video[:6] for video in X])\n",
+    "        tensor_videos = torch.tensor(X, dtype=torch.float32)\n",
+    "        # Clip values to 0 and 255\n",
+    "        tensor_videos = np.clip(tensor_videos, 0, 255)\n",
+    "        # Replace NaNs in each frame, with the average of the frame. This was generated with GPT\n",
+    "        for i in range(tensor_videos.shape[0]):\n",
+    "            for j in range(tensor_videos.shape[1]):\n",
+    "                tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(\n",
+    "                    tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])\n",
+    "        # Undersample the data for each of the 6 classes. Select max of 300 samples for each class\n",
+    "        # Very much generated with the assitance of chatGPT with some modifications\n",
+    "        # Get the indices of each class\n",
+    "        indices = [np.argwhere(y == i).squeeze(1) for i in range(6)]\n",
+    "        # Get the number of samples to take for each class\n",
+    "        # Get the indices of the samples to take\n",
+    "        indices_to_take = [np.random.choice(indices[i], n_samples, replace=True) for i in range(6)]\n",
+    "        # Concatenate the indices\n",
+    "        indices_to_take = np.concatenate(indices_to_take)\n",
+    "        # Select the samples\n",
+    "        tensor_videos = tensor_videos[indices_to_take].unsqueeze(1)\n",
+    "        y = y[indices_to_take]\n",
+    "        return torch.Tensor(tensor_videos), torch.Tensor(y).long()\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T14:11:23.392366Z",
+     "start_time": "2024-04-28T14:11:23.379289Z"
+    }
+   },
+   "id": "e68cd41b2d919ccd",
+   "execution_count": 60
+  },
+  {
+   "cell_type": "code",
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "834f6cae11789f02"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:36:33.686216Z",
+     "start_time": "2024-04-28T13:36:33.665939Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import f1_score\n",
+    "import optuna\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "with open('data.npy', 'rb') as f:\n",
+    "    data = np.load(f, allow_pickle=True).item()\n",
+    "    X = data['data']\n",
+    "    y = data['label']\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)\n",
+    "\n",
+    "not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
+    "y_test = [y_test[i] for i in not_nan_indices]\n",
+    "X_test = [X_test[i] for i in not_nan_indices]\n",
+    "\n",
+    "\n",
+    "def objective(trial):\n",
+    "    batch = trial.suggest_int(\"batch_size\", 1, 12, log=True)\n",
+    "    epochs = trial.suggest_int(\"epochs\", 1, 20)\n",
+    "    model = Model(batch_size=2**batch, epochs=epochs)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    return -f1_score(y_test, pred, average='macro')\n",
+    "# Run optimization\n",
+    "# storage = optuna.storages.InMemoryStorage()\n",
+    "# study = optuna.create_study(storage=storage)\n",
+    "# study.optimize(objective, n_trials=10)\n",
+    "# \n",
+    "# best_score = study.best_value\n",
+    "# best_params = study.best_params\n",
+    "# \n",
+    "# print(best_score, best_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mKeyboardInterrupt\u001B[0m                         Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[17], line 3\u001B[0m\n\u001B[1;32m      1\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(\u001B[38;5;241m3\u001B[39m):\n\u001B[1;32m      2\u001B[0m     model \u001B[38;5;241m=\u001B[39m Model(batch_size\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m8\u001B[39m, epochs\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m10\u001B[39m)\n\u001B[0;32m----> 3\u001B[0m     \u001B[43mmodel\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX_train\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my_train\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m      4\u001B[0m     pred \u001B[38;5;241m=\u001B[39m model\u001B[38;5;241m.\u001B[39mpredict(X_test)\n\u001B[1;32m      5\u001B[0m     \u001B[38;5;28mprint\u001B[39m(f1_score(y_test, pred, average\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmacro\u001B[39m\u001B[38;5;124m'\u001B[39m))\n",
+      "Cell \u001B[0;32mIn[16], line 59\u001B[0m, in \u001B[0;36mModel.fit\u001B[0;34m(self, X, y)\u001B[0m\n\u001B[1;32m     57\u001B[0m train_dataset \u001B[38;5;241m=\u001B[39m torch\u001B[38;5;241m.\u001B[39mutils\u001B[38;5;241m.\u001B[39mdata\u001B[38;5;241m.\u001B[39mTensorDataset(X, y)\n\u001B[1;32m     58\u001B[0m train_loader \u001B[38;5;241m=\u001B[39m torch\u001B[38;5;241m.\u001B[39mutils\u001B[38;5;241m.\u001B[39mdata\u001B[38;5;241m.\u001B[39mDataLoader(train_dataset, batch_size\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mbatch_size, shuffle\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m---> 59\u001B[0m \u001B[43mtrain\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcriterion\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moptimizer\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtrain_loader\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mepochs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "Cell \u001B[0;32mIn[16], line 38\u001B[0m, in \u001B[0;36mtrain\u001B[0;34m(model, criterion, optimizer, loader, epochs)\u001B[0m\n\u001B[1;32m     36\u001B[0m     outputs \u001B[38;5;241m=\u001B[39m model(inputs)\n\u001B[1;32m     37\u001B[0m     loss \u001B[38;5;241m=\u001B[39m criterion(outputs, labels)\n\u001B[0;32m---> 38\u001B[0m     \u001B[43mloss\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbackward\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m     39\u001B[0m     optimizer\u001B[38;5;241m.\u001B[39mstep()\n\u001B[1;32m     40\u001B[0m \u001B[38;5;66;03m# print(f'Epoch {epoch}, Loss: {loss.item()}')\u001B[39;00m\n",
+      "File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/torch/_tensor.py:522\u001B[0m, in \u001B[0;36mTensor.backward\u001B[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001B[0m\n\u001B[1;32m    512\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m has_torch_function_unary(\u001B[38;5;28mself\u001B[39m):\n\u001B[1;32m    513\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m handle_torch_function(\n\u001B[1;32m    514\u001B[0m         Tensor\u001B[38;5;241m.\u001B[39mbackward,\n\u001B[1;32m    515\u001B[0m         (\u001B[38;5;28mself\u001B[39m,),\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m    520\u001B[0m         inputs\u001B[38;5;241m=\u001B[39minputs,\n\u001B[1;32m    521\u001B[0m     )\n\u001B[0;32m--> 522\u001B[0m \u001B[43mtorch\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mautograd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbackward\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m    523\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mgradient\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mretain_graph\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcreate_graph\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43minputs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs\u001B[49m\n\u001B[1;32m    524\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/torch/autograd/__init__.py:266\u001B[0m, in \u001B[0;36mbackward\u001B[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001B[0m\n\u001B[1;32m    261\u001B[0m     retain_graph \u001B[38;5;241m=\u001B[39m create_graph\n\u001B[1;32m    263\u001B[0m \u001B[38;5;66;03m# The reason we repeat the same comment below is that\u001B[39;00m\n\u001B[1;32m    264\u001B[0m \u001B[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001B[39;00m\n\u001B[1;32m    265\u001B[0m \u001B[38;5;66;03m# calls in the traceback and some print out the last line\u001B[39;00m\n\u001B[0;32m--> 266\u001B[0m \u001B[43mVariable\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_execution_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_backward\u001B[49m\u001B[43m(\u001B[49m\u001B[43m  \u001B[49m\u001B[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001B[39;49;00m\n\u001B[1;32m    267\u001B[0m \u001B[43m    \u001B[49m\u001B[43mtensors\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    268\u001B[0m \u001B[43m    \u001B[49m\u001B[43mgrad_tensors_\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    269\u001B[0m \u001B[43m    \u001B[49m\u001B[43mretain_graph\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    270\u001B[0m \u001B[43m    \u001B[49m\u001B[43mcreate_graph\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    271\u001B[0m \u001B[43m    \u001B[49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    272\u001B[0m \u001B[43m    \u001B[49m\u001B[43mallow_unreachable\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m    273\u001B[0m \u001B[43m    \u001B[49m\u001B[43maccumulate_grad\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m    274\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
+      "\u001B[0;31mKeyboardInterrupt\u001B[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(3):\n",
+    "    model = Model(batch_size=8, epochs=10)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(f1_score(y_test, pred, average='macro'))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:12:15.506030Z",
+     "start_time": "2024-04-28T13:12:04.578590Z"
+    }
+   },
+   "id": "8d40e900efc2c1e3",
+   "execution_count": 17
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "ename": "DuplicatedStudyError",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mDuplicatedStudyError\u001B[0m                      Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[19], line 8\u001B[0m\n\u001B[1;32m      6\u001B[0m     pred \u001B[38;5;241m=\u001B[39m model\u001B[38;5;241m.\u001B[39mpredict(X_test)\n\u001B[1;32m      7\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;241m-\u001B[39mf1_score(y_test, pred, average\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmacro\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m----> 8\u001B[0m study \u001B[38;5;241m=\u001B[39m \u001B[43moptuna\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcreate_study\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstorage\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mstorage\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstudy_name\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mLearning_rate\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m      9\u001B[0m study\u001B[38;5;241m.\u001B[39moptimize(objective, n_trials\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m10\u001B[39m)\n\u001B[1;32m     10\u001B[0m \u001B[38;5;28mprint\u001B[39m(study\u001B[38;5;241m.\u001B[39mbest_trial)\n",
+      "File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/optuna/_convert_positional_args.py:83\u001B[0m, in \u001B[0;36mconvert_positional_args.<locals>.converter_decorator.<locals>.converter_wrapper\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m     77\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m(\n\u001B[1;32m     78\u001B[0m         \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mfunc\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m() got multiple values for arguments \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mduplicated_kwds\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m     79\u001B[0m     )\n\u001B[1;32m     81\u001B[0m kwargs\u001B[38;5;241m.\u001B[39mupdate(inferred_kwargs)\n\u001B[0;32m---> 83\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/optuna/study/study.py:1251\u001B[0m, in \u001B[0;36mcreate_study\u001B[0;34m(storage, sampler, pruner, study_name, direction, load_if_exists, directions)\u001B[0m\n\u001B[1;32m   1249\u001B[0m storage \u001B[38;5;241m=\u001B[39m storages\u001B[38;5;241m.\u001B[39mget_storage(storage)\n\u001B[1;32m   1250\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 1251\u001B[0m     study_id \u001B[38;5;241m=\u001B[39m \u001B[43mstorage\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcreate_new_study\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdirection_objects\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstudy_name\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m   1252\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m exceptions\u001B[38;5;241m.\u001B[39mDuplicatedStudyError:\n\u001B[1;32m   1253\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m load_if_exists:\n",
+      "File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/optuna/storages/_in_memory.py:62\u001B[0m, in \u001B[0;36mInMemoryStorage.create_new_study\u001B[0;34m(self, directions, study_name)\u001B[0m\n\u001B[1;32m     60\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m study_name \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m     61\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m study_name \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_study_name_to_id:\n\u001B[0;32m---> 62\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m DuplicatedStudyError\n\u001B[1;32m     63\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m     64\u001B[0m     study_uuid \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mstr\u001B[39m(uuid\u001B[38;5;241m.\u001B[39muuid4())\n",
+      "\u001B[0;31mDuplicatedStudyError\u001B[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# Study of best learning rate\n",
+    "def objective(trial):\n",
+    "    lr = trial.suggest_float(\"lr\", 1e-5, 5e-1, log=True)\n",
+    "    model = Model(lr=lr)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    return -f1_score(y_test, pred, average='macro')\n",
+    "study = optuna.create_study(storage=storage, study_name=\"Learning_rate\")\n",
+    "study.optimize(objective, n_trials=10)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:28:31.359163Z",
+     "start_time": "2024-04-28T13:28:31.282392Z"
+    }
+   },
+   "id": "569223093b7d12cd",
+   "execution_count": 19
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6912320650364129\n",
+      "0.6607744107744108\n",
+      "0.6665432155087326\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(3):\n",
+    "    model = Model()\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(f1_score(y_test, pred, average='macro'))\n",
+    "# WIth LR 0.00016764273108300424"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:30:54.182935Z",
+     "start_time": "2024-04-28T13:30:09.313591Z"
+    }
+   },
+   "id": "44d047094db9b1d",
+   "execution_count": 24
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.706415970280129\n",
+      "0.7055600716120302\n",
+      "0.67676009342676\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(3):\n",
+    "    model = Model(lr=0.001)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(f1_score(y_test, pred, average='macro'))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:32:17.798663Z",
+     "start_time": "2024-04-28T13:31:32.517631Z"
+    }
+   },
+   "id": "7076da680d4a43d9",
+   "execution_count": 25
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[I 2024-04-28 13:38:45,580] A new study created in memory with name: dropout\n",
+      "[I 2024-04-28 13:39:09,602] Trial 0 finished with value: -0.5916579614840399 and parameters: {'dropout': 0.2}. Best is trial 0 with value: -0.5916579614840399.\n",
+      "[I 2024-04-28 13:39:33,716] Trial 1 finished with value: -0.706054567707244 and parameters: {'dropout': 0.5}. Best is trial 1 with value: -0.706054567707244.\n",
+      "[I 2024-04-28 13:39:58,041] Trial 2 finished with value: -0.6914798955557488 and parameters: {'dropout': 0.1}. Best is trial 1 with value: -0.706054567707244.\n",
+      "[I 2024-04-28 13:40:22,578] Trial 3 finished with value: -0.7369303922613968 and parameters: {'dropout': 0.1}. Best is trial 3 with value: -0.7369303922613968.\n",
+      "[I 2024-04-28 13:40:46,740] Trial 4 finished with value: -0.7529025381898083 and parameters: {'dropout': 0.4}. Best is trial 4 with value: -0.7529025381898083.\n",
+      "[I 2024-04-28 13:41:10,786] Trial 5 finished with value: -0.6720344567712989 and parameters: {'dropout': 0.30000000000000004}. Best is trial 4 with value: -0.7529025381898083.\n",
+      "[I 2024-04-28 13:41:34,997] Trial 6 finished with value: -0.668808616427664 and parameters: {'dropout': 0.30000000000000004}. Best is trial 4 with value: -0.7529025381898083.\n",
+      "[I 2024-04-28 13:41:59,296] Trial 7 finished with value: -0.6959006417141586 and parameters: {'dropout': 0.5}. Best is trial 4 with value: -0.7529025381898083.\n",
+      "[I 2024-04-28 13:42:23,828] Trial 8 finished with value: -0.707880814386361 and parameters: {'dropout': 0.4}. Best is trial 4 with value: -0.7529025381898083.\n",
+      "[I 2024-04-28 13:42:48,169] Trial 9 finished with value: -0.7244402292510341 and parameters: {'dropout': 0.0}. Best is trial 4 with value: -0.7529025381898083.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "({'dropout': 0.4}, -0.7529025381898083)"
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def objective(trial):\n",
+    "    dropout = trial.suggest_float(\"dropout\", 0.0, 0.5, step=0.1)\n",
+    "    model = Model(dropout=dropout, lr=0.001)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    return -f1_score(y_test, pred, average='macro')\n",
+    "study = optuna.create_study(storage=storage, study_name=\"dropout\")\n",
+    "study.optimize(objective, n_trials=10)\n",
+    "\n",
+    "study.best_params, study.best_value"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:42:48.174167Z",
+     "start_time": "2024-04-28T13:38:45.579006Z"
+    }
+   },
+   "id": "9b62d90bafc1cacd",
+   "execution_count": 35
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6141327807994474\n",
+      "0.6064872820918911\n",
+      "0.6300338376466196\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n",
+    "not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
+    "y_test = [y_test[i] for i in not_nan_indices]\n",
+    "X_test = [X_test[i] for i in not_nan_indices]\n",
+    "\n",
+    "for i in range(3):\n",
+    "    model = Model(dropout=0.4, lr=0.001)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(f1_score(y_test, pred, average='macro'))\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:45:49.518836Z",
+     "start_time": "2024-04-28T13:44:37.580866Z"
+    }
+   },
+   "id": "c6c39df6367f332f",
+   "execution_count": 37
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6413423296891699\n",
+      "0.6768407575558187\n",
+      "0.6221822007289135\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n",
+    "not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
+    "y_test = [y_test[i] for i in not_nan_indices]\n",
+    "X_test = [X_test[i] for i in not_nan_indices]\n",
+    "\n",
+    "for i in range(3):\n",
+    "    model = Model(dropout=0.1, lr=0.001)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(f1_score(y_test, pred, average='macro'))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:47:37.461842Z",
+     "start_time": "2024-04-28T13:46:25.429049Z"
+    }
+   },
+   "id": "43eeb03175ef4fcf",
+   "execution_count": 38
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6933099871956078\n",
+      "0.662550713257427\n",
+      "0.7479965052551641\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(3):\n",
+    "    model = Model(dropout=0.0, lr=0.001)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(f1_score(y_test, pred, average='macro'))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:49:23.670317Z",
+     "start_time": "2024-04-28T13:48:12.536955Z"
+    }
+   },
+   "id": "ca306dcc36b378aa",
+   "execution_count": 39
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[I 2024-04-28 13:54:45,435] A new study created in memory with name: hidden_layer\n",
+      "[I 2024-04-28 13:55:08,688] Trial 0 finished with value: -0.6744674992735161 and parameters: {'hidden_layer': 5}. Best is trial 0 with value: -0.6744674992735161.\n",
+      "[I 2024-04-28 13:55:17,295] Trial 1 finished with value: -0.28941963054866277 and parameters: {'hidden_layer': 0}. Best is trial 0 with value: -0.6744674992735161.\n",
+      "[I 2024-04-28 13:55:27,158] Trial 2 finished with value: -0.5233606132063355 and parameters: {'hidden_layer': 1}. Best is trial 0 with value: -0.6744674992735161.\n",
+      "[I 2024-04-28 13:56:19,013] Trial 3 finished with value: -0.6249650240541486 and parameters: {'hidden_layer': 6}. Best is trial 0 with value: -0.6744674992735161.\n",
+      "[I 2024-04-28 13:57:10,821] Trial 4 finished with value: -0.7192056747499614 and parameters: {'hidden_layer': 6}. Best is trial 4 with value: -0.7192056747499614.\n",
+      "[I 2024-04-28 13:57:21,125] Trial 5 finished with value: -0.49548899023401516 and parameters: {'hidden_layer': 2}. Best is trial 4 with value: -0.7192056747499614.\n",
+      "[I 2024-04-28 13:58:13,096] Trial 6 finished with value: -0.6470733120834541 and parameters: {'hidden_layer': 6}. Best is trial 4 with value: -0.7192056747499614.\n",
+      "[I 2024-04-28 13:58:22,962] Trial 7 finished with value: -0.35437191511033816 and parameters: {'hidden_layer': 1}. Best is trial 4 with value: -0.7192056747499614.\n",
+      "[I 2024-04-28 13:58:32,939] Trial 8 finished with value: -0.46924857013983573 and parameters: {'hidden_layer': 1}. Best is trial 4 with value: -0.7192056747499614.\n",
+      "[I 2024-04-28 13:58:47,127] Trial 9 finished with value: -0.6718245833426378 and parameters: {'hidden_layer': 4}. Best is trial 4 with value: -0.7192056747499614.\n"
+     ]
+    }
+   ],
+   "source": [
+    "def objective(trial):\n",
+    "    hidden_layer = trial.suggest_int(\"hidden_layer\", 0, 6, step=1)\n",
+    "    model = Model(hidden_size=2**hidden_layer)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    return -f1_score(y_test, pred, average='macro')\n",
+    "study = optuna.create_study(storage=storage, study_name=\"hidden_layer\")\n",
+    "study.optimize(objective, n_trials=10)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T13:58:47.129260Z",
+     "start_time": "2024-04-28T13:54:45.434150Z"
+    }
+   },
+   "id": "9769cac2632f786c",
+   "execution_count": 51
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "size: 32 0.7587148200217135\n",
+      "size: 32 0.7007023098468655\n",
+      "size: 32 0.703371572957317\n",
+      "size: 16 0.6527439030663686\n",
+      "size: 16 0.6701033952446381\n",
+      "size: 16 0.6303813565416863\n",
+      "size: 64 0.6967700946646325\n",
+      "size: 64 0.6868739609967679\n",
+      "size: 64 0.7137992158825491\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n",
+    "not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
+    "y_test = [y_test[i] for i in not_nan_indices]\n",
+    "X_test = [X_test[i] for i in not_nan_indices]\n",
+    "for i in range(3):\n",
+    "    model = Model(hidden_size=32)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(\"size: 32\", f1_score(y_test, pred, average='macro'))\n",
+    "for i in range(3):\n",
+    "    model = Model(hidden_size=16)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(\"size: 16\", f1_score(y_test, pred, average='macro'))\n",
+    "    \n",
+    "for i in range(3):\n",
+    "    model = Model(hidden_size=64)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(\"size: 64\", f1_score(y_test, pred, average='macro'))\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T14:04:15.161623Z",
+     "start_time": "2024-04-28T13:59:45.112984Z"
+    }
+   },
+   "id": "17d540d809407eb4",
+   "execution_count": 52
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1->8->16 0.6983043227229274\n",
+      "1->8->16 0.6151284777600567\n",
+      "1->8->16 0.5974537037037037\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(3):\n",
+    "    model = Model(batch_size=64, epochs=20, lr=0.005)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(\"1->8->16\", f1_score(y_test, pred, average='macro'))\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T14:12:46.923583Z",
+     "start_time": "2024-04-28T14:11:28.546397Z"
+    }
+   },
+   "id": "3ffe3c938a4ff38c",
+   "execution_count": 61
+  },
+  {
+   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1->8->16 0.655143327874276\n",
+      "1->8->16 0.6882410205760642\n",
+      "1->8->16 0.7252563893010118\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(3):\n",
+    "    model = Model()\n",
+    "    model.fit(X_train, y_train)\n",
+    "    pred = model.predict(X_test)\n",
+    "    print(\"1->8->16\", f1_score(y_test, pred, average='macro'))\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-04-28T14:13:57.574163Z",
+     "start_time": "2024-04-28T14:12:46.924962Z"
+    }
+   },
+   "id": "bf98f16797188024",
+   "execution_count": 62
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}