From d294ac0e389b2c551d4b03450127de2bd6ae88b3 Mon Sep 17 00:00:00 2001 From: Yadunand Prem Date: Mon, 29 Apr 2024 12:45:46 +0800 Subject: [PATCH] feat: idek anymore --- cs2109s/labs/final/final.py | 44 +-- cs2109s/labs/final/main.ipynb | 64 +++-- cs2109s/labs/final/scratchpad.ipynb | 407 ++++++++++++++++++++++++---- 3 files changed, 417 insertions(+), 98 deletions(-) diff --git a/cs2109s/labs/final/final.py b/cs2109s/labs/final/final.py index 311373c..27b4795 100644 --- a/cs2109s/labs/final/final.py +++ b/cs2109s/labs/final/final.py @@ -16,6 +16,7 @@ from torch import nn import numpy as np import torch import os +from torchvision.transforms.functional import equalize class CNN3D(nn.Module): def __init__(self, hidden_size=32, dropout=0.0): @@ -27,7 +28,7 @@ class CNN3D(nn.Module): self.maxpool = nn.MaxPool3d(kernel_size=2, stride=2) self.fc1 = nn.Linear(hidden_size*32, 256) # Calculate input size based on output from conv3 self.fc2 = nn.Linear(256, 6) - self.dropout = nn.Dropout(dropout) + # self.dropout = nn.Dropout(dropout) def forward(self, x): x = self.conv1(x) @@ -37,7 +38,7 @@ class CNN3D(nn.Module): x = self.conv2(x) x = self.relu(x) x = self.maxpool(x) - x = self.dropout(x) + # x = self.dropout(x) x = x.view(x.size(0), -1) # Flatten features for fully connected layers x = self.fc1(x) @@ -56,17 +57,16 @@ def train(model, criterion, optimizer, loader, epochs=5): print(f'Epoch {epoch}, Loss: {loss.item()}') return model - - - class Model(): - def __init__(self, batch_size=8,lr=0.001,epochs=10, dropout=0.0, hidden_size=32): + def __init__(self, batch_size=64,lr=0.001,epochs=5, dropout=0.0, hidden_size=32, n_samples=900): + print(batch_size, epochs, lr, dropout, hidden_size, n_samples) self.batch_size = batch_size self.lr = lr self.epochs = epochs self.model = CNN3D(dropout=dropout, hidden_size=hidden_size) self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) + self.n_samples = n_samples def fit(self, X, y): X, y = self.process_data(X, y) @@ -81,31 +81,25 @@ class Model(): tensor_videos = torch.tensor(X, dtype=torch.float32) # Clip values to 0 and 255 tensor_videos = np.clip(tensor_videos, 0, 255) - # TEMP - threshold = 180 - tensor_videos[tensor_videos > threshold] = 255 - tensor_videos[tensor_videos < threshold] = 0 - # END TEMP # Replace NaNs in each frame, with the average of the frame. This was generated with GPT for i in range(tensor_videos.shape[0]): for j in range(tensor_videos.shape[1]): tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean( tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])]) - X = torch.Tensor(tensor_videos.unsqueeze(1)) - result = self.model(X) + # tensor_videos = torch.Tensor(tensor_videos).to(torch.uint8).reshape(-1, 1, 16, 16) + # tensor_videos = equalize(tensor_videos).float().reshape(-1, 1, 6, 16, 16) + tensor_videos = torch.Tensor(tensor_videos).reshape(-1, 1, 6, 16, 16) + # some funky code to make the features more prominent + + result = self.model(tensor_videos) return torch.max(result, dim=1)[1].numpy() - def process_data(self, X, y, n_samples=600): + def process_data(self, X, y): y = np.array(y) X = np.array([video[:6] for video in X]) tensor_videos = torch.tensor(X, dtype=torch.float32) # Clip values to 0 and 255 tensor_videos = np.clip(tensor_videos, 0, 255) - # TEMP - threshold = 180 - tensor_videos[tensor_videos > threshold] = 255 - tensor_videos[tensor_videos < threshold] = 0 - # END TEMP # Replace NaNs in each frame, with the average of the frame. This was generated with GPT for i in range(tensor_videos.shape[0]): @@ -118,13 +112,19 @@ class Model(): indices = [np.argwhere(y == i).squeeze(1) for i in range(6)] # Get the number of samples to take for each class # Get the indices of the samples to take - indices_to_take = [np.random.choice(indices[i], n_samples, replace=True) for i in range(6)] + indices_to_take = [np.random.choice(indices[i], self.n_samples, replace=True) for i in range(6)] # Concatenate the indices indices_to_take = np.concatenate(indices_to_take) # Select the samples - tensor_videos = tensor_videos[indices_to_take].unsqueeze(1) + tensor_videos = tensor_videos[indices_to_take] + + tensor_videos = torch.Tensor(tensor_videos).reshape(-1, 1, 6, 16, 16) + # Reshape the tensor to int for image processing + # tensor_videos = torch.Tensor(tensor_videos).to(torch.uint8).reshape(-1, 1, 16, 16) + # tensor_videos = equalize(tensor_videos).float().reshape(-1, 1, 6, 16, 16) + y = y[indices_to_take] - return torch.Tensor(tensor_videos), torch.Tensor(y).long() + return tensor_videos, torch.Tensor(y).long() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) diff --git a/cs2109s/labs/final/main.ipynb b/cs2109s/labs/final/main.ipynb index 15c8937..aec4b4a 100644 --- a/cs2109s/labs/final/main.ipynb +++ b/cs2109s/labs/final/main.ipynb @@ -315,12 +315,12 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 10, "id": "a44b7aa4", "metadata": { "ExecuteTime": { - "end_time": "2024-04-28T12:00:17.228662Z", - "start_time": "2024-04-28T12:00:17.209494Z" + "end_time": "2024-04-28T12:27:25.926991Z", + "start_time": "2024-04-28T12:27:25.917322Z" } }, "outputs": [], @@ -406,8 +406,8 @@ " def fit(self, X, y):\n", " X, y = process_data(X, y)\n", " train_dataset = torch.utils.data.TensorDataset(X, y)\n", - " train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)\n", - " train(self.model, self.criterion, self.optimizer, train_loader, 10)\n", + " train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)\n", + " train(self.model, self.criterion, self.optimizer, train_loader, 20)\n", "\n", " def predict(self, X):\n", " self.model.eval()\n", @@ -438,12 +438,12 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 2, "id": "4f4dd489", "metadata": { "ExecuteTime": { - "end_time": "2024-04-28T12:00:19.363096Z", - "start_time": "2024-04-28T12:00:19.352424Z" + "end_time": "2024-04-28T12:09:46.115322Z", + "start_time": "2024-04-28T12:09:45.631452Z" } }, "outputs": [], @@ -458,12 +458,12 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 3, "id": "3064e0ff", "metadata": { "ExecuteTime": { - "end_time": "2024-04-28T12:00:20.265060Z", - "start_time": "2024-04-28T12:00:20.234748Z" + "end_time": "2024-04-28T12:09:47.340881Z", + "start_time": "2024-04-28T12:09:47.317719Z" } }, "outputs": [], @@ -477,12 +477,12 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 12, "id": "27c9fd10", "metadata": { "ExecuteTime": { - "end_time": "2024-04-28T12:00:37.185569Z", - "start_time": "2024-04-28T12:00:22.239036Z" + "end_time": "2024-04-28T12:28:29.269402Z", + "start_time": "2024-04-28T12:28:02.494602Z" } }, "outputs": [ @@ -490,19 +490,29 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch 0, Loss: 0.7495917081832886\n", - "Epoch 1, Loss: 0.42713749408721924\n", - "Epoch 2, Loss: 0.21424821019172668\n", - "Epoch 3, Loss: 0.02086367830634117\n", - "Epoch 4, Loss: 0.005386564414948225\n", - "Epoch 5, Loss: 0.00319607718847692\n", - "Epoch 6, Loss: 0.007663913071155548\n", - "Epoch 7, Loss: 0.003004509722813964\n", - "Epoch 8, Loss: 0.0044013322331011295\n", - "Epoch 9, Loss: 0.0016760551370680332\n", - "F1 Score (macro): 0.75\n", - "CPU times: user 57.8 s, sys: 1min 12s, total: 2min 10s\n", - "Wall time: 14.9 s\n" + "Epoch 0, Loss: 0.5610745549201965\n", + "Epoch 1, Loss: 0.22023160755634308\n", + "Epoch 2, Loss: 0.03679683431982994\n", + "Epoch 3, Loss: 0.009054183959960938\n", + "Epoch 4, Loss: 0.0021134500857442617\n", + "Epoch 5, Loss: 0.002705463906750083\n", + "Epoch 6, Loss: 0.0045105633325874805\n", + "Epoch 7, Loss: 0.001958428416401148\n", + "Epoch 8, Loss: 0.0010891605634242296\n", + "Epoch 9, Loss: 0.0010821395553648472\n", + "Epoch 10, Loss: 0.0007317279814742506\n", + "Epoch 11, Loss: 0.0006673489115200937\n", + "Epoch 12, Loss: 0.00047141974209807813\n", + "Epoch 13, Loss: 0.00024128056247718632\n", + "Epoch 14, Loss: 0.0003150832490064204\n", + "Epoch 15, Loss: 0.0004005862574558705\n", + "Epoch 16, Loss: 0.00024190203112084419\n", + "Epoch 17, Loss: 0.0004451812419574708\n", + "Epoch 18, Loss: 0.000376795680494979\n", + "Epoch 19, Loss: 0.0003616203321143985\n", + "F1 Score (macro): 0.65\n", + "CPU times: user 2min 33s, sys: 255 ms, total: 2min 34s\n", + "Wall time: 26.8 s\n" ] } ], diff --git a/cs2109s/labs/final/scratchpad.ipynb b/cs2109s/labs/final/scratchpad.ipynb index 602d5f6..f014c4e 100644 --- a/cs2109s/labs/final/scratchpad.ipynb +++ b/cs2109s/labs/final/scratchpad.ipynb @@ -39,54 +39,52 @@ "id": "9c14a2d8", "metadata": {}, "source": [ - "**[TODO]**\n", - "\n", - "Please provide a summary of the ideas and steps that led you to your final model. Someone reading this summary should understand why you chose to approach the problem in a particular way and able to replicate your final model at a high level. Please ensure that your summary is detailed enough to provide an overview of your thought process and approach but also concise enough to be easily understandable. Also, please follow the guidelines given in the `main.ipynb`.\n", - "\n", - "This report should not be longer than **1-2 pages of A4 paper (up to around 1,000 words)**. Marks will be deducted if you do not follow instructions and you include too many words here. \n", - "\n", - "**[DELETE EVERYTHING FROM THE PREVIOUS TODO TO HERE BEFORE SUBMISSION]**\n", - "\n", "##### Overview\n", - "**[TODO]**\n", + "https://chat.openai.com/share/ec6c6778-d7cc-48e2-98d1-24b7f6a6a769\n", "\n", "##### 1. Descriptive Analysis\n", - "**[TODO]**\n", + "At the start, I *did* not read the main.ipynb and jumped straight into teh scratchpad to read the data directly. After fiddling with the data for about an hour, being confused about the variable n (6-10) x 16 x 16 images, I decided to read the main.ipynb fully. I then realised that the data was a list of videos. With that in mind, I decided to plot out the images. However, on first observation, I did not come to realise teh images as japanese characters. I only came to this understanding at 9.30PM on Sunday.\n", "\n", "##### 2. Detection and Handling of Missing Values\n", - "**[TODO]**\n", + "There were quite a few NaNs for both the X and y values. Initially, I considered the y NaNs as a class of its own. On further reading of teh main file, I realised that the Nans were not a class, but rows to be removed. After filtering out the y values, there was still the X values to consider. Initially, to get a quick model out, I decided to zero out the values. In the end, I decided to replace the NaNs with the average of the frame. This was done by iterating through each frame, and replacing the NaNs with the average of the frame. This was done with the assistance of GPT for the code generation.\n", "\n", "##### 3. Detection and Handling of Outliers\n", - "**[TODO]**\n", + "There were quite a few outliers in teh dataset, which strayed away from the general min and max of 0 and 255. To resolve this, I used the np.clip(X, 0, 255) to upper bound and lowerbound the values. I did some experimentation with trying to make the different values more distinct, but this was not successful. With some rudimentary code, I was able to improve the performance of teh detection, somewhat, but it was not very consistent. \n", + "\n", + "I decided to use equalize also, which should make the data more prominent. However, with the equalize method, I hit time limit, and was not able to use that. \n", "\n", "##### 4. Detection and Handling of Class Imbalance \n", - "**[TODO]**\n", + "There was heavy class imbalance in the dataset. This was fixed with sampling the data. I used both upsampling and downsampling, where I upsampled data when it didn't hit the minimum I needed from each class, and downsampled data when it exceeded the maximum I needed from each class. \n", "\n", "##### 5. Understanding Relationship Between Variables\n", - "**[TODO]**\n", + "This analysis was not particularly done. Since I was processing it as images, I didn't plan to use dimentionality reduction techniques or other means. \n", + "\n", "\n", "##### 6. Data Visualization\n", - "**[TODO]** \n", + "The data visualisation can be seen below. Generally, I used data visualisation to directly see the images, and also see the distribution of the classes. \n", + "\n", + "Visualization was also helpful in viewing the outliers, especially with boxplots. However, I didn't use visualiastions to inspect the model output. \n", "##### 7. General Preprocessing\n", - "**[TODO]**\n", + "The general preprocessing was done by reducing the data to 6 frames, and then processing the data to remove NaNs and clip the values. The data was then sampled to `600` elements, (which was obtained through hyperparameter optimisation). \n", " \n", "##### 8. Feature Selection \n", - "**[TODO]**\n", + "NO feature selection and engineering was done, generally the entire dataset was used. NaN values were replaced by frame averages. \n", "\n", "##### 9. Feature Engineering\n", - "**[TODO]**\n", + "No featuer engineering was also done. \n", "\n", "##### 10. Creating Models\n", - "**[TODO]**\n", - "\n", + "The model that I eventually came up with after a lot of experimentation is a 2 layer CNN. I had experimented with a CNN -> RNN, but that didn't work very well. The first model I had created was a 2D CNN, but I struggled with mapping the Videos to a stream of photos effectively. I then created a 1 layer conv3d model, and that gave me a solid result. I then experimented with the possible 2nd layers for the model. I first used LSTM, as I thought that since this was video data, it would be more effective. However, the LSTM model did not give good results. I am predicting this is due to bad hyperparameter optimisation. When I switched the 2nd layer to another 3D CNN, it worked quite well. Implementing batch normalization also improved the results greatly. \n", "##### 11. Model Evaluation\n", - "**[TODO]**\n", + "The model was evaludated using F1. I averaged out the results of 3 runs of the model with F1 before I decided to finalize this. \n", "\n", "##### 12. Hyperparameters Search\n", - "**[TODO]**\n", + "Optuna was used to search for hyperparameters. Learning this library was quite useful. However, I wasn't able to optimally implement this, due to large variances when running hte model multiple times. Figuring out how to take the average value of multiple runs for each parameter search would have been useful to find the most optimal parameters. I mainly used this to find parameters for lr, hidden layers and batch size. I wasn't able to use it optimally due to time constraints too. \n", "\n", "##### Conclusion\n", - "**[TODO]**" + "I spent way too long due to a bad understanding of a lot of concepts in AI. This exam helped me to learn a lot of stuff that we had used in class, but didn't fully understand. For example, in the psets, a lot of the code writing was very machanical. But this exam allowed us to be creative and problem solve. However, it would have been nicer if the duration was much shorter. \n", + "\n", + "The main aspects I struggled with was trying to figure out what to with the videos. The initial method of processing them as just images was not very effective. This was mainly due to my inability to code out a solution well. " ] }, { @@ -156,12 +154,12 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 267, "id": "6297e25a", "metadata": { "ExecuteTime": { - "end_time": "2024-04-28T06:46:52.453152Z", - "start_time": "2024-04-28T06:46:52.428539Z" + "end_time": "2024-04-28T13:26:08.028976Z", + "start_time": "2024-04-28T13:26:08.014557Z" } }, "outputs": [ @@ -205,40 +203,124 @@ }, { "cell_type": "code", - "execution_count": 44, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import torch\n", + "\n", + "def show_images(images, n_row=5, n_col=5, figsize=[12,12]):\n", + " _, axs = plt.subplots(n_row, n_col, figsize=figsize)\n", + " axs = axs.flatten()\n", + " for img, ax in zip(images, axs):\n", + " ax.imshow(img, cmap='gray')\n", + " plt.show()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-28T15:03:57.588510Z", + "start_time": "2024-04-28T15:03:57.585485Z" + } + }, + "id": "f8155151c5e660f5", + "execution_count": 330 + }, + { + "cell_type": "code", + "execution_count": 338, "id": "3b1f62dd", "metadata": { "ExecuteTime": { - "end_time": "2024-04-28T06:46:52.472306Z", - "start_time": "2024-04-28T06:46:52.454360Z" + "end_time": "2024-04-28T15:07:52.672987Z", + "start_time": "2024-04-28T15:07:51.997288Z" } }, "outputs": [ { - "ename": "ValueError", - "evalue": "setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2250,) + inhomogeneous part.", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[44], line 10\u001B[0m\n\u001B[1;32m 8\u001B[0m X6 \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39marray([video[:\u001B[38;5;241m6\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m video \u001B[38;5;129;01min\u001B[39;00m X])\n\u001B[1;32m 9\u001B[0m \u001B[38;5;66;03m# Now that they are consistent, we can convert them to a numpy array\u001B[39;00m\n\u001B[0;32m---> 10\u001B[0m X6 \u001B[38;5;241m=\u001B[39m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43marray\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX\u001B[49m\u001B[43m)\u001B[49m\n", - "\u001B[0;31mValueError\u001B[0m: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2250,) + inhomogeneous part." + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([6, 1, 16, 16])\n" + ] + }, + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([227, 255, 255, 255, 212, 242, 255, 255, 247, 255, 255, 255, 208, 231,\n", + " 230, 223], dtype=torch.uint8)\n", + "[227. 255. 255. 255. 212. 242. 255. 255. 247. 255. 255. 255. 208. 231.\n", + " 230. 223.]\n" ] } ], "source": [ - "# Remove nans from the input. This needs to be done in the model for training data as well\n", + "# Remove NaN's from the input\n", + "import torchvision\n", "not_nan_indices = np.argwhere(~np.isnan(np.array(y))).squeeze()\n", "y = [y[i] for i in not_nan_indices]\n", "X = [X[i] for i in not_nan_indices]\n", - "y = np.array(y).astype(int)\n", + "# Plot each image in a row\n", + "tmp = X[0][:6].copy()\n", + "# Set 255 to all values in X which are greater than 120\n", + "# Set 0 to all values in X which are less than 100\n", "\n", - "# Since each video varies in length, we will take the min length, 6, for each video\n", - "X6 = np.array([video[:6] for video in X])\n", - "# Now that they are consistent, we can convert them to a numpy array\n", - "X6 = np.array(X)\n" + "tmp = np.array(tmp)\n", + "tmp = np.nan_to_num(tmp, 0)\n", + "tmp = np.clip(tmp, 0, 255)\n", + "tensor = torch.Tensor(tmp)\n", + "tensor = tensor.to(torch.uint8).reshape(-1, 1, 16, 16)\n", + "print(tensor.shape)\n", + "tensor = torchvision.transforms.functional.equalize(tensor)\n", + "tensor = tensor.reshape(6, 16, 16)\n", + "# 100 all values less than \n", + "show_images(tensor, n_row=1, n_col=6, figsize=[20, 20])\n", + "show_images(tmp, n_row=1, n_col=6, figsize=[20, 20])\n", + "print(tensor[0][0])\n", + "print(tmp[0][0])\n", + "# At 9.30PM on Sunday I've come to realies that this is japanese characters... A bit too late to figure that out...\n" ] }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1.0, 1.0, 4.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 0.0]\n" + ] + } + ], + "source": [ + "print(y[:10]) # y is just a list of values" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-28T14:16:29.783690Z", + "start_time": "2024-04-28T14:16:29.781229Z" + } + }, + "id": "3b890da00340343f", + "execution_count": 294 + }, { "cell_type": "code", "outputs": [], @@ -356,16 +438,73 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 349, "id": "93f82e42", "metadata": { "ExecuteTime": { - "end_time": "2024-04-28T06:46:52.483551Z", - "start_time": "2024-04-28T06:46:52.482068Z" + "end_time": "2024-04-28T15:44:25.035919Z", + "start_time": "2024-04-28T15:44:24.956794Z" } }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 349, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the values in all the images. \n", + "# Flatten the image\n", + "data_to_plot = pd.DataFrame(X[0].reshape(-1))\n", + "data_to_plot.boxplot()\n", + "\n" + ] + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 351, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "y_pd = pd.DataFrame(torch.tensor(y).int())\n", + "y_pd.value_counts().plot(kind='bar')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-28T15:45:30.464473Z", + "start_time": "2024-04-28T15:45:30.370547Z" + } + }, + "id": "1752fd9dbaef6786", + "execution_count": 351 }, { "cell_type": "markdown", @@ -712,7 +851,177 @@ "id": "81addd51", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def objective(trial):\n", + " batch = trial.suggest_int(\"batch_size\", 1, 12, log=True)\n", + " epochs = trial.suggest_int(\"epochs\", 1, 20)\n", + " model = Model(batch_size=2**batch, epochs=epochs)\n", + " model.fit(X_train, y_train)\n", + " pred = model.predict(X_test)\n", + " return -f1_score(y_test, pred, average='macro')\n", + "# Run optimization\n", + "# storage = optuna.storages.InMemoryStorage()\n", + "# study = optuna.create_study(storage=storage)\n", + "# study.optimize(objective, n_trials=10)\n", + "# \n", + "# best_score = study.best_value\n", + "# best_params = study.best_params\n", + "# \n", + "# print(best_score, best_params)\n" + ] + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import os\n", + "\n", + "from torch import nn\n", + "\n", + "with open('data.npy', 'rb') as f:\n", + " data = np.load(f, allow_pickle=True).item()\n", + " X = data['data']\n", + " y = data['label']\n", + "\n", + "from torch import nn\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "from torch import nn\n", + "import numpy as np\n", + "import torch\n", + "import os\n", + "from torchvision.transforms.functional import equalize\n", + "\n", + "class CNN3D(nn.Module):\n", + " def __init__(self, hidden_size=32, dropout=0.0):\n", + " super(CNN3D, self).__init__()\n", + " self.conv1 = nn.Conv3d(1, hidden_size, kernel_size=3, stride=1, padding=1)\n", + " self.batchnorm = nn.BatchNorm3d(hidden_size)\n", + " self.conv2 = nn.Conv3d(hidden_size, hidden_size*2, kernel_size=3, stride=1, padding=1)\n", + " self.relu = nn.ReLU()\n", + " self.maxpool = nn.MaxPool3d(kernel_size=2, stride=2)\n", + " self.fc1 = nn.Linear(hidden_size*32, 256) # Calculate input size based on output from conv3\n", + " self.fc2 = nn.Linear(256, 6)\n", + " # self.dropout = nn.Dropout(dropout)\n", + "\n", + " def forward(self, x):\n", + " x = self.conv1(x)\n", + " x = self.relu(x)\n", + " x = self.maxpool(x)\n", + " x = self.batchnorm(x)\n", + " x = self.conv2(x)\n", + " x = self.relu(x)\n", + " x = self.maxpool(x)\n", + " # x = self.dropout(x)\n", + "\n", + " x = x.view(x.size(0), -1) # Flatten features for fully connected layers\n", + " x = self.fc1(x)\n", + " x = self.relu(x)\n", + " x = self.fc2(x)\n", + " return x\n", + "\n", + "def train(model, criterion, optimizer, loader, epochs=5):\n", + " for epoch in range(epochs):\n", + " for idx, (inputs, labels) in enumerate(loader):\n", + " optimizer.zero_grad()\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + " print(f'Epoch {epoch}, Loss: {loss.item()}')\n", + " return model\n", + "\n", + "class Model():\n", + " def __init__(self, batch_size=64,lr=0.001,epochs=10, dropout=0.0, hidden_size=32, n_samples=900):\n", + " print(batch_size, epochs, lr, dropout, hidden_size, n_samples)\n", + " self.batch_size = batch_size\n", + " self.lr = lr\n", + " self.epochs = epochs\n", + " self.model = CNN3D(dropout=dropout, hidden_size=hidden_size)\n", + " self.criterion = nn.CrossEntropyLoss()\n", + " self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)\n", + " self.n_samples = n_samples\n", + "\n", + " def fit(self, X, y):\n", + " X, y = self.process_data(X, y)\n", + " train_dataset = torch.utils.data.TensorDataset(X, y)\n", + " train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)\n", + " train(self.model, self.criterion, self.optimizer, train_loader, self.epochs)\n", + "\n", + " def predict(self, X):\n", + " self.model.eval()\n", + " with torch.no_grad():\n", + " X = np.array([video[:6] for video in X])\n", + " tensor_videos = torch.tensor(X, dtype=torch.float32)\n", + " # Clip values to 0 and 255\n", + " tensor_videos = np.clip(tensor_videos, 0, 255)\n", + "\n", + " # Replace NaNs in each frame, with the average of the frame. This was generated with GPT\n", + " for i in range(tensor_videos.shape[0]):\n", + " for j in range(tensor_videos.shape[1]):\n", + " tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(\n", + " tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])\n", + " # tensor_videos = torch.Tensor(tensor_videos).to(torch.uint8).reshape(-1, 1, 16, 16)\n", + " # tensor_videos = equalize(tensor_videos).float().reshape(-1, 1, 6, 16, 16)\n", + " tensor_videos = torch.Tensor(tensor_videos).reshape(-1, 1, 6, 16, 16)\n", + " # some funky code to make the features more prominent\n", + "\n", + " result = self.model(tensor_videos)\n", + " return torch.max(result, dim=1)[1].numpy()\n", + " def process_data(self, X, y):\n", + " y = np.array(y)\n", + " X = np.array([video[:6] for video in X])\n", + " tensor_videos = torch.tensor(X, dtype=torch.float32)\n", + " # Clip values to 0 and 255\n", + " tensor_videos = np.clip(tensor_videos, 0, 255)\n", + "\n", + " # Replace NaNs in each frame, with the average of the frame. This was generated with GPT\n", + " for i in range(tensor_videos.shape[0]):\n", + " for j in range(tensor_videos.shape[1]):\n", + " tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(\n", + " tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])\n", + " # Undersample the data for each of the 6 classes. Select max of 300 samples for each class\n", + " # Very much generated with the assitance of chatGPT with some modifications\n", + " # Get the indices of each class\n", + " indices = [np.argwhere(y == i).squeeze(1) for i in range(6)]\n", + " # Get the number of samples to take for each class\n", + " # Get the indices of the samples to take\n", + " indices_to_take = [np.random.choice(indices[i], self.n_samples, replace=True) for i in range(6)]\n", + " # Concatenate the indices\n", + " indices_to_take = np.concatenate(indices_to_take)\n", + " # Select the samples\n", + " tensor_videos = tensor_videos[indices_to_take]\n", + "\n", + " tensor_videos = torch.Tensor(tensor_videos).reshape(-1, 1, 6, 16, 16)\n", + " # Reshape the tensor to int for image processing\n", + " # tensor_videos = torch.Tensor(tensor_videos).to(torch.uint8).reshape(-1, 1, 16, 16)\n", + " # tensor_videos = equalize(tensor_videos).float().reshape(-1, 1, 6, 16, 16)\n", + "\n", + " y = y[indices_to_take]\n", + " return tensor_videos, torch.Tensor(y).long()\n", + "\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n", + "\n", + "not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n", + "y_test = [y_test[i] for i in not_nan_indices]\n", + "X_test = [X_test[i] for i in not_nan_indices]\n", + "\n", + "print(\"init model\")\n", + "model = Model()\n", + "model.fit(X_train, y_train)\n", + "\n", + "from sklearn.metrics import f1_score\n", + "\n", + "y_pred = model.predict(X_test)\n", + "print(\"F1 Score (macro): {0:.2f}\".format(f1_score(y_test, y_pred, average='macro'))) # You may encounter errors, you are expected to figure out what's the issue." + ], + "metadata": { + "collapsed": false + }, + "id": "a56142ab267bafaa" } ], "metadata": {