maybe good?

2024-04-28 15:58:30 +08:00
parent 1312e694c3
commit d2e87aec97
21 changed files with 3097 additions and 700 deletions
--- a/cs2109s/labs/final-mock/main.ipynb
+++ b/cs2109s/labs/final-mock/main.ipynb
@@ -299,16 +299,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 70,
   "id": "a44b7aa4",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-04-27T07:45:57.664982Z",
+     "start_time": "2024-04-27T07:45:57.652624Z"
+    }
+   },
   "outputs": [],
   "source": [
+    "from sklearn.preprocessing import OrdinalEncoder\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "import sklearn.ensemble\n",
+    "\n",
+    "\n",
    "class Model:  \n",
    "    \"\"\"\n",
    "    This class represents an AI model.\n",
    "    \"\"\"\n",
-    "    \n",
    "    def __init__(self):\n",
    "        \"\"\"\n",
    "        Constructor for Model class.\n",
@@ -318,9 +327,26 @@
    "        self : object\n",
    "            The instance of the object passed by Python.\n",
    "        \"\"\"\n",
-    "        # TODO: Replace the following code with your own initialization code.\n",
-    "        pass\n",
-    "    \n",
+    "        self.model = LinearRegression()\n",
+    "\n",
+    "    def process_input(self, X):\n",
+    "        images = X['images'].reshape(X['images'].shape[0], -1)\n",
+    "        X = X['tabular']\n",
+    "        X = \n",
+    "        def object_columns(X):\n",
+    "            return X.dtypes[X.dtypes == 'object'].index\n",
+    "\n",
+    "        def convert_to_ordinal(X, columns):\n",
+    "            encoder = OrdinalEncoder()\n",
+    "            return encoder.fit_transform(X[columns])\n",
+    "\n",
+    "        obj_cols = object_columns(X)\n",
+    "        ordinal_columns = convert_to_ordinal(X, obj_cols)\n",
+    "        X[obj_cols] = ordinal_columns\n",
+    "        columns_to_drop = ['V40', 'V20', 'V39', 'V15', 'V10', 'V35', 'V2', 'V52', 'V45', 'V7', 'V48', 'V49', 'V43', 'V44', 'V26', 'V41', 'V11', 'V53', 'V42', 'V38']\n",
+    "        X = X.drop(columns_to_drop, axis=1)\n",
+    "        X = X.fillna(X.mean())\n",
+    "        return X\n",
    "    def fit(self, X_dict, y):\n",
    "        \"\"\"\n",
    "        Train the model using the input data.\n",
@@ -339,9 +365,11 @@
    "        self : object\n",
    "            Returns an instance of the trained model.\n",
    "        \"\"\"\n",
-    "        # TODO: Add your training code.\n",
+    "        X = X_dict['tabular']\n",
+    "        X = self.process_input(X)\n",
+    "        self.model.fit(X, y)\n",
    "        return self\n",
-    "    \n",
+    "       \n",
    "    def predict(self, X_dict):\n",
    "        \"\"\"\n",
    "        Use the trained model to make predictions.\n",
@@ -359,8 +387,9 @@
    "           Predicted target values per element in X_dict.\n",
    "           \n",
    "        \"\"\"\n",
-    "        # TODO: Replace the following code with your own prediction code.\n",
-    "        return [0 for _ in range(len(X_dict['tabular']))]"
+    "        X = self.process_input(X_dict['tabular'])\n",
+    "        return self.model.predict(X)\n",
+    "        # return [0 for _ in range(len(X_dict['tabular']))]"
   ]
  },
  {
@@ -375,9 +404,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 51,
   "id": "4f4dd489",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-04-27T07:23:39.732051Z",
+     "start_time": "2024-04-27T07:23:39.725818Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# Import packages\n",
@@ -390,9 +424,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 52,
   "id": "3064e0ff",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-04-27T07:23:42.216498Z",
+     "start_time": "2024-04-27T07:23:40.676178Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# Load data\n",
@@ -413,10 +452,47 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 71,
   "id": "27c9fd10",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-04-27T07:46:01.374238Z",
+     "start_time": "2024-04-27T07:45:59.640013Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/zd/9vyg32393qncxwt_3r_873mh0000gn/T/ipykernel_29080/3308836053.py:43: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X[obj_cols] = ordinal_columns\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MSE: 5352.19\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/zd/9vyg32393qncxwt_3r_873mh0000gn/T/ipykernel_29080/3308836053.py:43: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X[obj_cols] = ordinal_columns\n"
+     ]
+    }
+   ],
   "source": [
    "# Split train and test\n",
    "X_dict_train, y_train, X_dict_test, y_test = dict_train_test_split(X_dict, y, ratio=0.9)\n",