experimental
This commit is contained in:
parent
2d95b112c5
commit
215cde2d19
@ -17,17 +17,17 @@ import numpy as np
|
||||
import torch
|
||||
import os
|
||||
|
||||
|
||||
class CNN3D(nn.Module):
|
||||
def __init__(self):
|
||||
def __init__(self, hidden_size=32, dropout=0.0):
|
||||
super(CNN3D, self).__init__()
|
||||
self.conv1 = nn.Conv3d(1, 32, kernel_size=3, stride=1, padding=1)
|
||||
self.conv2 = nn.Conv3d(32, 64, kernel_size=3, stride=1, padding=1)
|
||||
self.batchnorm = nn.BatchNorm3d(32)
|
||||
self.conv1 = nn.Conv3d(1, hidden_size, kernel_size=3, stride=1, padding=1)
|
||||
self.batchnorm = nn.BatchNorm3d(hidden_size)
|
||||
self.conv2 = nn.Conv3d(hidden_size, hidden_size*2, kernel_size=3, stride=1, padding=1)
|
||||
self.relu = nn.ReLU()
|
||||
self.maxpool = nn.MaxPool3d(kernel_size=2, stride=2)
|
||||
self.fc1 = nn.Linear(1024, 256) # Calculate input size based on output from conv3
|
||||
self.fc1 = nn.Linear(hidden_size*32, 256) # Calculate input size based on output from conv3
|
||||
self.fc2 = nn.Linear(256, 6)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
@ -37,6 +37,7 @@ class CNN3D(nn.Module):
|
||||
x = self.conv2(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.dropout(x)
|
||||
|
||||
x = x.view(x.size(0), -1) # Flatten features for fully connected layers
|
||||
x = self.fc1(x)
|
||||
@ -49,7 +50,6 @@ def train(model, criterion, optimizer, loader, epochs=5):
|
||||
for idx, (inputs, labels) in enumerate(loader):
|
||||
optimizer.zero_grad()
|
||||
outputs = model(inputs)
|
||||
# print(outputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
@ -57,44 +57,22 @@ def train(model, criterion, optimizer, loader, epochs=5):
|
||||
return model
|
||||
|
||||
|
||||
def process_data(X, y):
|
||||
y = np.array(y)
|
||||
X = np.array([video[:6] for video in X])
|
||||
tensor_videos = torch.tensor(X, dtype=torch.float32)
|
||||
# Clip values to 0 and 255
|
||||
tensor_videos = np.clip(tensor_videos, 0, 255)
|
||||
# Replace NaNs in each frame, with the average of the frame. This was generated with GPT
|
||||
for i in range(tensor_videos.shape[0]):
|
||||
for j in range(tensor_videos.shape[1]):
|
||||
tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(
|
||||
tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])
|
||||
# Undersample the data for each of the 6 classes. Select max of 300 samples for each class
|
||||
# Very much generated with the assitance of chatGPT with some modifications
|
||||
# Get the indices of each class
|
||||
indices = [np.argwhere(y == i).squeeze(1) for i in range(6)]
|
||||
# Get the number of samples to take for each class
|
||||
num_samples_to_take = 600
|
||||
# Get the indices of the samples to take
|
||||
indices_to_take = [np.random.choice(indices[i], num_samples_to_take, replace=True) for i in range(6)]
|
||||
# Concatenate the indices
|
||||
indices_to_take = np.concatenate(indices_to_take)
|
||||
# Select the samples
|
||||
tensor_videos = tensor_videos[indices_to_take].unsqueeze(1)
|
||||
y = y[indices_to_take]
|
||||
return torch.Tensor(tensor_videos), torch.Tensor(y).long()
|
||||
|
||||
|
||||
class Model():
|
||||
def __init__(self):
|
||||
self.model = CNN3D()
|
||||
def __init__(self, batch_size=8,lr=0.001,epochs=10, dropout=0.0, hidden_size=32):
|
||||
self.batch_size = batch_size
|
||||
self.lr = lr
|
||||
self.epochs = epochs
|
||||
self.model = CNN3D(dropout=dropout, hidden_size=hidden_size)
|
||||
self.criterion = nn.CrossEntropyLoss()
|
||||
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001)
|
||||
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
|
||||
|
||||
def fit(self, X, y):
|
||||
X, y = process_data(X, y)
|
||||
X, y = self.process_data(X, y)
|
||||
train_dataset = torch.utils.data.TensorDataset(X, y)
|
||||
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
|
||||
train(self.model, self.criterion, self.optimizer, train_loader, 10)
|
||||
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
|
||||
train(self.model, self.criterion, self.optimizer, train_loader, self.epochs)
|
||||
|
||||
def predict(self, X):
|
||||
self.model.eval()
|
||||
@ -103,6 +81,12 @@ class Model():
|
||||
tensor_videos = torch.tensor(X, dtype=torch.float32)
|
||||
# Clip values to 0 and 255
|
||||
tensor_videos = np.clip(tensor_videos, 0, 255)
|
||||
# TEMP
|
||||
threshold = 180
|
||||
tensor_videos[tensor_videos > threshold] = 255
|
||||
tensor_videos[tensor_videos < threshold] = 0
|
||||
# END TEMP
|
||||
|
||||
# Replace NaNs in each frame, with the average of the frame. This was generated with GPT
|
||||
for i in range(tensor_videos.shape[0]):
|
||||
for j in range(tensor_videos.shape[1]):
|
||||
@ -111,6 +95,37 @@ class Model():
|
||||
X = torch.Tensor(tensor_videos.unsqueeze(1))
|
||||
result = self.model(X)
|
||||
return torch.max(result, dim=1)[1].numpy()
|
||||
def process_data(self, X, y, n_samples=600):
|
||||
y = np.array(y)
|
||||
X = np.array([video[:6] for video in X])
|
||||
tensor_videos = torch.tensor(X, dtype=torch.float32)
|
||||
# Clip values to 0 and 255
|
||||
tensor_videos = np.clip(tensor_videos, 0, 255)
|
||||
# TEMP
|
||||
threshold = 180
|
||||
tensor_videos[tensor_videos > threshold] = 255
|
||||
tensor_videos[tensor_videos < threshold] = 0
|
||||
# END TEMP
|
||||
|
||||
# Replace NaNs in each frame, with the average of the frame. This was generated with GPT
|
||||
for i in range(tensor_videos.shape[0]):
|
||||
for j in range(tensor_videos.shape[1]):
|
||||
tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(
|
||||
tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])
|
||||
# Undersample the data for each of the 6 classes. Select max of 300 samples for each class
|
||||
# Very much generated with the assitance of chatGPT with some modifications
|
||||
# Get the indices of each class
|
||||
indices = [np.argwhere(y == i).squeeze(1) for i in range(6)]
|
||||
# Get the number of samples to take for each class
|
||||
# Get the indices of the samples to take
|
||||
indices_to_take = [np.random.choice(indices[i], n_samples, replace=True) for i in range(6)]
|
||||
# Concatenate the indices
|
||||
indices_to_take = np.concatenate(indices_to_take)
|
||||
# Select the samples
|
||||
tensor_videos = tensor_videos[indices_to_take].unsqueeze(1)
|
||||
y = y[indices_to_take]
|
||||
return torch.Tensor(tensor_videos), torch.Tensor(y).long()
|
||||
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
|
||||
|
||||
@ -118,6 +133,7 @@ not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()
|
||||
y_test = [y_test[i] for i in not_nan_indices]
|
||||
X_test = [X_test[i] for i in not_nan_indices]
|
||||
|
||||
print("init model")
|
||||
model = Model()
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
|
635
cs2109s/labs/final/optuna.ipynb
Normal file
635
cs2109s/labs/final/optuna.ipynb
Normal file
@ -0,0 +1,635 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from torch import nn\n",
|
||||
"import numpy as np\n",
|
||||
"import torch\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"class CNN3D(nn.Module):\n",
|
||||
" def __init__(self, hidden_size=32, dropout=0.0):\n",
|
||||
" super(CNN3D, self).__init__()\n",
|
||||
" self.conv1 = nn.Conv3d(1, hidden_size, kernel_size=3, stride=1, padding=1)\n",
|
||||
" self.batchnorm = nn.BatchNorm3d(hidden_size)\n",
|
||||
" self.conv2 = nn.Conv3d(hidden_size, hidden_size*2, kernel_size=3, stride=1, padding=1)\n",
|
||||
" self.relu = nn.ReLU()\n",
|
||||
" self.maxpool = nn.MaxPool3d(kernel_size=2, stride=2)\n",
|
||||
" self.fc1 = nn.Linear(hidden_size*32, 256) # Calculate input size based on output from conv3\n",
|
||||
" self.fc2 = nn.Linear(256, 6)\n",
|
||||
" self.dropout = nn.Dropout(dropout)\n",
|
||||
"\n",
|
||||
" def forward(self, x):\n",
|
||||
" x = self.conv1(x)\n",
|
||||
" x = self.relu(x)\n",
|
||||
" x = self.maxpool(x)\n",
|
||||
" x = self.batchnorm(x)\n",
|
||||
" x = self.conv2(x)\n",
|
||||
" x = self.relu(x)\n",
|
||||
" x = self.maxpool(x)\n",
|
||||
" x = self.dropout(x)\n",
|
||||
"\n",
|
||||
" x = x.view(x.size(0), -1) # Flatten features for fully connected layers\n",
|
||||
" x = self.fc1(x)\n",
|
||||
" x = self.relu(x)\n",
|
||||
" x = self.fc2(x)\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
"def train(model, criterion, optimizer, loader, epochs=5):\n",
|
||||
" for epoch in range(epochs):\n",
|
||||
" for idx, (inputs, labels) in enumerate(loader):\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" outputs = model(inputs)\n",
|
||||
" loss = criterion(outputs, labels)\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" # print(f'Epoch {epoch}, Loss: {loss.item()}')\n",
|
||||
" return model\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Model():\n",
|
||||
" def __init__(self, batch_size=8,lr=0.001,epochs=10, dropout=0.0, hidden_size=32):\n",
|
||||
" self.batch_size = batch_size\n",
|
||||
" self.lr = lr\n",
|
||||
" self.epochs = epochs\n",
|
||||
" self.model = CNN3D(dropout=dropout, hidden_size=hidden_size)\n",
|
||||
" self.criterion = nn.CrossEntropyLoss()\n",
|
||||
" self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)\n",
|
||||
"\n",
|
||||
" def fit(self, X, y):\n",
|
||||
" X, y = self.process_data(X, y)\n",
|
||||
" train_dataset = torch.utils.data.TensorDataset(X, y)\n",
|
||||
" train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)\n",
|
||||
" train(self.model, self.criterion, self.optimizer, train_loader, self.epochs)\n",
|
||||
"\n",
|
||||
" def predict(self, X):\n",
|
||||
" self.model.eval()\n",
|
||||
" with torch.no_grad():\n",
|
||||
" X = np.array([video[:6] for video in X])\n",
|
||||
" tensor_videos = torch.tensor(X, dtype=torch.float32)\n",
|
||||
" # Clip values to 0 and 255\n",
|
||||
" tensor_videos = np.clip(tensor_videos, 0, 255)\n",
|
||||
" # Replace NaNs in each frame, with the average of the frame. This was generated with GPT\n",
|
||||
" for i in range(tensor_videos.shape[0]):\n",
|
||||
" for j in range(tensor_videos.shape[1]):\n",
|
||||
" tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(\n",
|
||||
" tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])\n",
|
||||
" X = torch.Tensor(tensor_videos.unsqueeze(1))\n",
|
||||
" result = self.model(X)\n",
|
||||
" return torch.max(result, dim=1)[1].numpy()\n",
|
||||
" def process_data(self, X, y, n_samples=600):\n",
|
||||
" y = np.array(y)\n",
|
||||
" X = np.array([video[:6] for video in X])\n",
|
||||
" tensor_videos = torch.tensor(X, dtype=torch.float32)\n",
|
||||
" # Clip values to 0 and 255\n",
|
||||
" tensor_videos = np.clip(tensor_videos, 0, 255)\n",
|
||||
" # Replace NaNs in each frame, with the average of the frame. This was generated with GPT\n",
|
||||
" for i in range(tensor_videos.shape[0]):\n",
|
||||
" for j in range(tensor_videos.shape[1]):\n",
|
||||
" tensor_videos[i][j][torch.isnan(tensor_videos[i][j])] = torch.mean(\n",
|
||||
" tensor_videos[i][j][~torch.isnan(tensor_videos[i][j])])\n",
|
||||
" # Undersample the data for each of the 6 classes. Select max of 300 samples for each class\n",
|
||||
" # Very much generated with the assitance of chatGPT with some modifications\n",
|
||||
" # Get the indices of each class\n",
|
||||
" indices = [np.argwhere(y == i).squeeze(1) for i in range(6)]\n",
|
||||
" # Get the number of samples to take for each class\n",
|
||||
" # Get the indices of the samples to take\n",
|
||||
" indices_to_take = [np.random.choice(indices[i], n_samples, replace=True) for i in range(6)]\n",
|
||||
" # Concatenate the indices\n",
|
||||
" indices_to_take = np.concatenate(indices_to_take)\n",
|
||||
" # Select the samples\n",
|
||||
" tensor_videos = tensor_videos[indices_to_take].unsqueeze(1)\n",
|
||||
" y = y[indices_to_take]\n",
|
||||
" return torch.Tensor(tensor_videos), torch.Tensor(y).long()\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T14:11:23.392366Z",
|
||||
"start_time": "2024-04-28T14:11:23.379289Z"
|
||||
}
|
||||
},
|
||||
"id": "e68cd41b2d919ccd",
|
||||
"execution_count": 60
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "834f6cae11789f02"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:36:33.686216Z",
|
||||
"start_time": "2024-04-28T13:36:33.665939Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import f1_score\n",
|
||||
"import optuna\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"with open('data.npy', 'rb') as f:\n",
|
||||
" data = np.load(f, allow_pickle=True).item()\n",
|
||||
" X = data['data']\n",
|
||||
" y = data['label']\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)\n",
|
||||
"\n",
|
||||
"not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
|
||||
"y_test = [y_test[i] for i in not_nan_indices]\n",
|
||||
"X_test = [X_test[i] for i in not_nan_indices]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def objective(trial):\n",
|
||||
" batch = trial.suggest_int(\"batch_size\", 1, 12, log=True)\n",
|
||||
" epochs = trial.suggest_int(\"epochs\", 1, 20)\n",
|
||||
" model = Model(batch_size=2**batch, epochs=epochs)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" return -f1_score(y_test, pred, average='macro')\n",
|
||||
"# Run optimization\n",
|
||||
"# storage = optuna.storages.InMemoryStorage()\n",
|
||||
"# study = optuna.create_study(storage=storage)\n",
|
||||
"# study.optimize(objective, n_trials=10)\n",
|
||||
"# \n",
|
||||
"# best_score = study.best_value\n",
|
||||
"# best_params = study.best_params\n",
|
||||
"# \n",
|
||||
"# print(best_score, best_params)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
|
||||
"Cell \u001B[0;32mIn[17], line 3\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(\u001B[38;5;241m3\u001B[39m):\n\u001B[1;32m 2\u001B[0m model \u001B[38;5;241m=\u001B[39m Model(batch_size\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m8\u001B[39m, epochs\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m10\u001B[39m)\n\u001B[0;32m----> 3\u001B[0m \u001B[43mmodel\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX_train\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my_train\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 4\u001B[0m pred \u001B[38;5;241m=\u001B[39m model\u001B[38;5;241m.\u001B[39mpredict(X_test)\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28mprint\u001B[39m(f1_score(y_test, pred, average\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmacro\u001B[39m\u001B[38;5;124m'\u001B[39m))\n",
|
||||
"Cell \u001B[0;32mIn[16], line 59\u001B[0m, in \u001B[0;36mModel.fit\u001B[0;34m(self, X, y)\u001B[0m\n\u001B[1;32m 57\u001B[0m train_dataset \u001B[38;5;241m=\u001B[39m torch\u001B[38;5;241m.\u001B[39mutils\u001B[38;5;241m.\u001B[39mdata\u001B[38;5;241m.\u001B[39mTensorDataset(X, y)\n\u001B[1;32m 58\u001B[0m train_loader \u001B[38;5;241m=\u001B[39m torch\u001B[38;5;241m.\u001B[39mutils\u001B[38;5;241m.\u001B[39mdata\u001B[38;5;241m.\u001B[39mDataLoader(train_dataset, batch_size\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mbatch_size, shuffle\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m---> 59\u001B[0m \u001B[43mtrain\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmodel\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcriterion\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moptimizer\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtrain_loader\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mepochs\u001B[49m\u001B[43m)\u001B[49m\n",
|
||||
"Cell \u001B[0;32mIn[16], line 38\u001B[0m, in \u001B[0;36mtrain\u001B[0;34m(model, criterion, optimizer, loader, epochs)\u001B[0m\n\u001B[1;32m 36\u001B[0m outputs \u001B[38;5;241m=\u001B[39m model(inputs)\n\u001B[1;32m 37\u001B[0m loss \u001B[38;5;241m=\u001B[39m criterion(outputs, labels)\n\u001B[0;32m---> 38\u001B[0m \u001B[43mloss\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbackward\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 39\u001B[0m optimizer\u001B[38;5;241m.\u001B[39mstep()\n\u001B[1;32m 40\u001B[0m \u001B[38;5;66;03m# print(f'Epoch {epoch}, Loss: {loss.item()}')\u001B[39;00m\n",
|
||||
"File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/torch/_tensor.py:522\u001B[0m, in \u001B[0;36mTensor.backward\u001B[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001B[0m\n\u001B[1;32m 512\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m has_torch_function_unary(\u001B[38;5;28mself\u001B[39m):\n\u001B[1;32m 513\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m handle_torch_function(\n\u001B[1;32m 514\u001B[0m Tensor\u001B[38;5;241m.\u001B[39mbackward,\n\u001B[1;32m 515\u001B[0m (\u001B[38;5;28mself\u001B[39m,),\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 520\u001B[0m inputs\u001B[38;5;241m=\u001B[39minputs,\n\u001B[1;32m 521\u001B[0m )\n\u001B[0;32m--> 522\u001B[0m \u001B[43mtorch\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mautograd\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbackward\u001B[49m\u001B[43m(\u001B[49m\n\u001B[1;32m 523\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mgradient\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mretain_graph\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcreate_graph\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43minputs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs\u001B[49m\n\u001B[1;32m 524\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
|
||||
"File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/torch/autograd/__init__.py:266\u001B[0m, in \u001B[0;36mbackward\u001B[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001B[0m\n\u001B[1;32m 261\u001B[0m retain_graph \u001B[38;5;241m=\u001B[39m create_graph\n\u001B[1;32m 263\u001B[0m \u001B[38;5;66;03m# The reason we repeat the same comment below is that\u001B[39;00m\n\u001B[1;32m 264\u001B[0m \u001B[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001B[39;00m\n\u001B[1;32m 265\u001B[0m \u001B[38;5;66;03m# calls in the traceback and some print out the last line\u001B[39;00m\n\u001B[0;32m--> 266\u001B[0m \u001B[43mVariable\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_execution_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_backward\u001B[49m\u001B[43m(\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001B[39;49;00m\n\u001B[1;32m 267\u001B[0m \u001B[43m \u001B[49m\u001B[43mtensors\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 268\u001B[0m \u001B[43m \u001B[49m\u001B[43mgrad_tensors_\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 269\u001B[0m \u001B[43m \u001B[49m\u001B[43mretain_graph\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 270\u001B[0m \u001B[43m \u001B[49m\u001B[43mcreate_graph\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 271\u001B[0m \u001B[43m \u001B[49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 272\u001B[0m \u001B[43m \u001B[49m\u001B[43mallow_unreachable\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 273\u001B[0m \u001B[43m \u001B[49m\u001B[43maccumulate_grad\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\n\u001B[1;32m 274\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
|
||||
"\u001B[0;31mKeyboardInterrupt\u001B[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(3):\n",
|
||||
" model = Model(batch_size=8, epochs=10)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(f1_score(y_test, pred, average='macro'))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:12:15.506030Z",
|
||||
"start_time": "2024-04-28T13:12:04.578590Z"
|
||||
}
|
||||
},
|
||||
"id": "8d40e900efc2c1e3",
|
||||
"execution_count": 17
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "DuplicatedStudyError",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[0;31mDuplicatedStudyError\u001B[0m Traceback (most recent call last)",
|
||||
"Cell \u001B[0;32mIn[19], line 8\u001B[0m\n\u001B[1;32m 6\u001B[0m pred \u001B[38;5;241m=\u001B[39m model\u001B[38;5;241m.\u001B[39mpredict(X_test)\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;241m-\u001B[39mf1_score(y_test, pred, average\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmacro\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[0;32m----> 8\u001B[0m study \u001B[38;5;241m=\u001B[39m \u001B[43moptuna\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcreate_study\u001B[49m\u001B[43m(\u001B[49m\u001B[43mstorage\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mstorage\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstudy_name\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mLearning_rate\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m 9\u001B[0m study\u001B[38;5;241m.\u001B[39moptimize(objective, n_trials\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m10\u001B[39m)\n\u001B[1;32m 10\u001B[0m \u001B[38;5;28mprint\u001B[39m(study\u001B[38;5;241m.\u001B[39mbest_trial)\n",
|
||||
"File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/optuna/_convert_positional_args.py:83\u001B[0m, in \u001B[0;36mconvert_positional_args.<locals>.converter_decorator.<locals>.converter_wrapper\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 77\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m(\n\u001B[1;32m 78\u001B[0m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mfunc\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m() got multiple values for arguments \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mduplicated_kwds\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m 79\u001B[0m )\n\u001B[1;32m 81\u001B[0m kwargs\u001B[38;5;241m.\u001B[39mupdate(inferred_kwargs)\n\u001B[0;32m---> 83\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
|
||||
"File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/optuna/study/study.py:1251\u001B[0m, in \u001B[0;36mcreate_study\u001B[0;34m(storage, sampler, pruner, study_name, direction, load_if_exists, directions)\u001B[0m\n\u001B[1;32m 1249\u001B[0m storage \u001B[38;5;241m=\u001B[39m storages\u001B[38;5;241m.\u001B[39mget_storage(storage)\n\u001B[1;32m 1250\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 1251\u001B[0m study_id \u001B[38;5;241m=\u001B[39m \u001B[43mstorage\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcreate_new_study\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdirection_objects\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mstudy_name\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 1252\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m exceptions\u001B[38;5;241m.\u001B[39mDuplicatedStudyError:\n\u001B[1;32m 1253\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m load_if_exists:\n",
|
||||
"File \u001B[0;32m/nix/store/7xbwd39w9rk2qgwpyf8c4h8bv02bl3c9-python3-3.11.9-env/lib/python3.11/site-packages/optuna/storages/_in_memory.py:62\u001B[0m, in \u001B[0;36mInMemoryStorage.create_new_study\u001B[0;34m(self, directions, study_name)\u001B[0m\n\u001B[1;32m 60\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m study_name \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 61\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m study_name \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_study_name_to_id:\n\u001B[0;32m---> 62\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m DuplicatedStudyError\n\u001B[1;32m 63\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 64\u001B[0m study_uuid \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mstr\u001B[39m(uuid\u001B[38;5;241m.\u001B[39muuid4())\n",
|
||||
"\u001B[0;31mDuplicatedStudyError\u001B[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Study of best learning rate\n",
|
||||
"def objective(trial):\n",
|
||||
" lr = trial.suggest_float(\"lr\", 1e-5, 5e-1, log=True)\n",
|
||||
" model = Model(lr=lr)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" return -f1_score(y_test, pred, average='macro')\n",
|
||||
"study = optuna.create_study(storage=storage, study_name=\"Learning_rate\")\n",
|
||||
"study.optimize(objective, n_trials=10)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:28:31.359163Z",
|
||||
"start_time": "2024-04-28T13:28:31.282392Z"
|
||||
}
|
||||
},
|
||||
"id": "569223093b7d12cd",
|
||||
"execution_count": 19
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6912320650364129\n",
|
||||
"0.6607744107744108\n",
|
||||
"0.6665432155087326\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(3):\n",
|
||||
" model = Model()\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(f1_score(y_test, pred, average='macro'))\n",
|
||||
"# WIth LR 0.00016764273108300424"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:30:54.182935Z",
|
||||
"start_time": "2024-04-28T13:30:09.313591Z"
|
||||
}
|
||||
},
|
||||
"id": "44d047094db9b1d",
|
||||
"execution_count": 24
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.706415970280129\n",
|
||||
"0.7055600716120302\n",
|
||||
"0.67676009342676\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(3):\n",
|
||||
" model = Model(lr=0.001)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(f1_score(y_test, pred, average='macro'))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:32:17.798663Z",
|
||||
"start_time": "2024-04-28T13:31:32.517631Z"
|
||||
}
|
||||
},
|
||||
"id": "7076da680d4a43d9",
|
||||
"execution_count": 25
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[I 2024-04-28 13:38:45,580] A new study created in memory with name: dropout\n",
|
||||
"[I 2024-04-28 13:39:09,602] Trial 0 finished with value: -0.5916579614840399 and parameters: {'dropout': 0.2}. Best is trial 0 with value: -0.5916579614840399.\n",
|
||||
"[I 2024-04-28 13:39:33,716] Trial 1 finished with value: -0.706054567707244 and parameters: {'dropout': 0.5}. Best is trial 1 with value: -0.706054567707244.\n",
|
||||
"[I 2024-04-28 13:39:58,041] Trial 2 finished with value: -0.6914798955557488 and parameters: {'dropout': 0.1}. Best is trial 1 with value: -0.706054567707244.\n",
|
||||
"[I 2024-04-28 13:40:22,578] Trial 3 finished with value: -0.7369303922613968 and parameters: {'dropout': 0.1}. Best is trial 3 with value: -0.7369303922613968.\n",
|
||||
"[I 2024-04-28 13:40:46,740] Trial 4 finished with value: -0.7529025381898083 and parameters: {'dropout': 0.4}. Best is trial 4 with value: -0.7529025381898083.\n",
|
||||
"[I 2024-04-28 13:41:10,786] Trial 5 finished with value: -0.6720344567712989 and parameters: {'dropout': 0.30000000000000004}. Best is trial 4 with value: -0.7529025381898083.\n",
|
||||
"[I 2024-04-28 13:41:34,997] Trial 6 finished with value: -0.668808616427664 and parameters: {'dropout': 0.30000000000000004}. Best is trial 4 with value: -0.7529025381898083.\n",
|
||||
"[I 2024-04-28 13:41:59,296] Trial 7 finished with value: -0.6959006417141586 and parameters: {'dropout': 0.5}. Best is trial 4 with value: -0.7529025381898083.\n",
|
||||
"[I 2024-04-28 13:42:23,828] Trial 8 finished with value: -0.707880814386361 and parameters: {'dropout': 0.4}. Best is trial 4 with value: -0.7529025381898083.\n",
|
||||
"[I 2024-04-28 13:42:48,169] Trial 9 finished with value: -0.7244402292510341 and parameters: {'dropout': 0.0}. Best is trial 4 with value: -0.7529025381898083.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "({'dropout': 0.4}, -0.7529025381898083)"
|
||||
},
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def objective(trial):\n",
|
||||
" dropout = trial.suggest_float(\"dropout\", 0.0, 0.5, step=0.1)\n",
|
||||
" model = Model(dropout=dropout, lr=0.001)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" return -f1_score(y_test, pred, average='macro')\n",
|
||||
"study = optuna.create_study(storage=storage, study_name=\"dropout\")\n",
|
||||
"study.optimize(objective, n_trials=10)\n",
|
||||
"\n",
|
||||
"study.best_params, study.best_value"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:42:48.174167Z",
|
||||
"start_time": "2024-04-28T13:38:45.579006Z"
|
||||
}
|
||||
},
|
||||
"id": "9b62d90bafc1cacd",
|
||||
"execution_count": 35
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6141327807994474\n",
|
||||
"0.6064872820918911\n",
|
||||
"0.6300338376466196\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n",
|
||||
"not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
|
||||
"y_test = [y_test[i] for i in not_nan_indices]\n",
|
||||
"X_test = [X_test[i] for i in not_nan_indices]\n",
|
||||
"\n",
|
||||
"for i in range(3):\n",
|
||||
" model = Model(dropout=0.4, lr=0.001)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(f1_score(y_test, pred, average='macro'))\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:45:49.518836Z",
|
||||
"start_time": "2024-04-28T13:44:37.580866Z"
|
||||
}
|
||||
},
|
||||
"id": "c6c39df6367f332f",
|
||||
"execution_count": 37
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6413423296891699\n",
|
||||
"0.6768407575558187\n",
|
||||
"0.6221822007289135\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n",
|
||||
"not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
|
||||
"y_test = [y_test[i] for i in not_nan_indices]\n",
|
||||
"X_test = [X_test[i] for i in not_nan_indices]\n",
|
||||
"\n",
|
||||
"for i in range(3):\n",
|
||||
" model = Model(dropout=0.1, lr=0.001)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(f1_score(y_test, pred, average='macro'))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:47:37.461842Z",
|
||||
"start_time": "2024-04-28T13:46:25.429049Z"
|
||||
}
|
||||
},
|
||||
"id": "43eeb03175ef4fcf",
|
||||
"execution_count": 38
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.6933099871956078\n",
|
||||
"0.662550713257427\n",
|
||||
"0.7479965052551641\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(3):\n",
|
||||
" model = Model(dropout=0.0, lr=0.001)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(f1_score(y_test, pred, average='macro'))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:49:23.670317Z",
|
||||
"start_time": "2024-04-28T13:48:12.536955Z"
|
||||
}
|
||||
},
|
||||
"id": "ca306dcc36b378aa",
|
||||
"execution_count": 39
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[I 2024-04-28 13:54:45,435] A new study created in memory with name: hidden_layer\n",
|
||||
"[I 2024-04-28 13:55:08,688] Trial 0 finished with value: -0.6744674992735161 and parameters: {'hidden_layer': 5}. Best is trial 0 with value: -0.6744674992735161.\n",
|
||||
"[I 2024-04-28 13:55:17,295] Trial 1 finished with value: -0.28941963054866277 and parameters: {'hidden_layer': 0}. Best is trial 0 with value: -0.6744674992735161.\n",
|
||||
"[I 2024-04-28 13:55:27,158] Trial 2 finished with value: -0.5233606132063355 and parameters: {'hidden_layer': 1}. Best is trial 0 with value: -0.6744674992735161.\n",
|
||||
"[I 2024-04-28 13:56:19,013] Trial 3 finished with value: -0.6249650240541486 and parameters: {'hidden_layer': 6}. Best is trial 0 with value: -0.6744674992735161.\n",
|
||||
"[I 2024-04-28 13:57:10,821] Trial 4 finished with value: -0.7192056747499614 and parameters: {'hidden_layer': 6}. Best is trial 4 with value: -0.7192056747499614.\n",
|
||||
"[I 2024-04-28 13:57:21,125] Trial 5 finished with value: -0.49548899023401516 and parameters: {'hidden_layer': 2}. Best is trial 4 with value: -0.7192056747499614.\n",
|
||||
"[I 2024-04-28 13:58:13,096] Trial 6 finished with value: -0.6470733120834541 and parameters: {'hidden_layer': 6}. Best is trial 4 with value: -0.7192056747499614.\n",
|
||||
"[I 2024-04-28 13:58:22,962] Trial 7 finished with value: -0.35437191511033816 and parameters: {'hidden_layer': 1}. Best is trial 4 with value: -0.7192056747499614.\n",
|
||||
"[I 2024-04-28 13:58:32,939] Trial 8 finished with value: -0.46924857013983573 and parameters: {'hidden_layer': 1}. Best is trial 4 with value: -0.7192056747499614.\n",
|
||||
"[I 2024-04-28 13:58:47,127] Trial 9 finished with value: -0.6718245833426378 and parameters: {'hidden_layer': 4}. Best is trial 4 with value: -0.7192056747499614.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def objective(trial):\n",
|
||||
" hidden_layer = trial.suggest_int(\"hidden_layer\", 0, 6, step=1)\n",
|
||||
" model = Model(hidden_size=2**hidden_layer)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" return -f1_score(y_test, pred, average='macro')\n",
|
||||
"study = optuna.create_study(storage=storage, study_name=\"hidden_layer\")\n",
|
||||
"study.optimize(objective, n_trials=10)\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T13:58:47.129260Z",
|
||||
"start_time": "2024-04-28T13:54:45.434150Z"
|
||||
}
|
||||
},
|
||||
"id": "9769cac2632f786c",
|
||||
"execution_count": 51
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"size: 32 0.7587148200217135\n",
|
||||
"size: 32 0.7007023098468655\n",
|
||||
"size: 32 0.703371572957317\n",
|
||||
"size: 16 0.6527439030663686\n",
|
||||
"size: 16 0.6701033952446381\n",
|
||||
"size: 16 0.6303813565416863\n",
|
||||
"size: 64 0.6967700946646325\n",
|
||||
"size: 64 0.6868739609967679\n",
|
||||
"size: 64 0.7137992158825491\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n",
|
||||
"not_nan_indices = np.argwhere(~np.isnan(np.array(y_test))).squeeze()\n",
|
||||
"y_test = [y_test[i] for i in not_nan_indices]\n",
|
||||
"X_test = [X_test[i] for i in not_nan_indices]\n",
|
||||
"for i in range(3):\n",
|
||||
" model = Model(hidden_size=32)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(\"size: 32\", f1_score(y_test, pred, average='macro'))\n",
|
||||
"for i in range(3):\n",
|
||||
" model = Model(hidden_size=16)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(\"size: 16\", f1_score(y_test, pred, average='macro'))\n",
|
||||
" \n",
|
||||
"for i in range(3):\n",
|
||||
" model = Model(hidden_size=64)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(\"size: 64\", f1_score(y_test, pred, average='macro'))\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T14:04:15.161623Z",
|
||||
"start_time": "2024-04-28T13:59:45.112984Z"
|
||||
}
|
||||
},
|
||||
"id": "17d540d809407eb4",
|
||||
"execution_count": 52
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1->8->16 0.6983043227229274\n",
|
||||
"1->8->16 0.6151284777600567\n",
|
||||
"1->8->16 0.5974537037037037\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(3):\n",
|
||||
" model = Model(batch_size=64, epochs=20, lr=0.005)\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(\"1->8->16\", f1_score(y_test, pred, average='macro'))\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T14:12:46.923583Z",
|
||||
"start_time": "2024-04-28T14:11:28.546397Z"
|
||||
}
|
||||
},
|
||||
"id": "3ffe3c938a4ff38c",
|
||||
"execution_count": 61
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1->8->16 0.655143327874276\n",
|
||||
"1->8->16 0.6882410205760642\n",
|
||||
"1->8->16 0.7252563893010118\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(3):\n",
|
||||
" model = Model()\n",
|
||||
" model.fit(X_train, y_train)\n",
|
||||
" pred = model.predict(X_test)\n",
|
||||
" print(\"1->8->16\", f1_score(y_test, pred, average='macro'))\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-28T14:13:57.574163Z",
|
||||
"start_time": "2024-04-28T14:12:46.924962Z"
|
||||
}
|
||||
},
|
||||
"id": "bf98f16797188024",
|
||||
"execution_count": 62
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user