maybe good?

This commit is contained in:
2024-04-28 15:58:30 +08:00
parent 1312e694c3
commit d2e87aec97
21 changed files with 3097 additions and 700 deletions

View File

@@ -299,16 +299,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 70,
"id": "a44b7aa4",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-27T07:45:57.664982Z",
"start_time": "2024-04-27T07:45:57.652624Z"
}
},
"outputs": [],
"source": [
"from sklearn.preprocessing import OrdinalEncoder\n",
"from sklearn.linear_model import LinearRegression\n",
"import sklearn.ensemble\n",
"\n",
"\n",
"class Model: \n",
" \"\"\"\n",
" This class represents an AI model.\n",
" \"\"\"\n",
" \n",
" def __init__(self):\n",
" \"\"\"\n",
" Constructor for Model class.\n",
@@ -318,9 +327,26 @@
" self : object\n",
" The instance of the object passed by Python.\n",
" \"\"\"\n",
" # TODO: Replace the following code with your own initialization code.\n",
" pass\n",
" \n",
" self.model = LinearRegression()\n",
"\n",
" def process_input(self, X):\n",
" images = X['images'].reshape(X['images'].shape[0], -1)\n",
" X = X['tabular']\n",
" X = \n",
" def object_columns(X):\n",
" return X.dtypes[X.dtypes == 'object'].index\n",
"\n",
" def convert_to_ordinal(X, columns):\n",
" encoder = OrdinalEncoder()\n",
" return encoder.fit_transform(X[columns])\n",
"\n",
" obj_cols = object_columns(X)\n",
" ordinal_columns = convert_to_ordinal(X, obj_cols)\n",
" X[obj_cols] = ordinal_columns\n",
" columns_to_drop = ['V40', 'V20', 'V39', 'V15', 'V10', 'V35', 'V2', 'V52', 'V45', 'V7', 'V48', 'V49', 'V43', 'V44', 'V26', 'V41', 'V11', 'V53', 'V42', 'V38']\n",
" X = X.drop(columns_to_drop, axis=1)\n",
" X = X.fillna(X.mean())\n",
" return X\n",
" def fit(self, X_dict, y):\n",
" \"\"\"\n",
" Train the model using the input data.\n",
@@ -339,9 +365,11 @@
" self : object\n",
" Returns an instance of the trained model.\n",
" \"\"\"\n",
" # TODO: Add your training code.\n",
" X = X_dict['tabular']\n",
" X = self.process_input(X)\n",
" self.model.fit(X, y)\n",
" return self\n",
" \n",
" \n",
" def predict(self, X_dict):\n",
" \"\"\"\n",
" Use the trained model to make predictions.\n",
@@ -359,8 +387,9 @@
" Predicted target values per element in X_dict.\n",
" \n",
" \"\"\"\n",
" # TODO: Replace the following code with your own prediction code.\n",
" return [0 for _ in range(len(X_dict['tabular']))]"
" X = self.process_input(X_dict['tabular'])\n",
" return self.model.predict(X)\n",
" # return [0 for _ in range(len(X_dict['tabular']))]"
]
},
{
@@ -375,9 +404,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 51,
"id": "4f4dd489",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-27T07:23:39.732051Z",
"start_time": "2024-04-27T07:23:39.725818Z"
}
},
"outputs": [],
"source": [
"# Import packages\n",
@@ -390,9 +424,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 52,
"id": "3064e0ff",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-27T07:23:42.216498Z",
"start_time": "2024-04-27T07:23:40.676178Z"
}
},
"outputs": [],
"source": [
"# Load data\n",
@@ -413,10 +452,47 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 71,
"id": "27c9fd10",
"metadata": {},
"outputs": [],
"metadata": {
"ExecuteTime": {
"end_time": "2024-04-27T07:46:01.374238Z",
"start_time": "2024-04-27T07:45:59.640013Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/zd/9vyg32393qncxwt_3r_873mh0000gn/T/ipykernel_29080/3308836053.py:43: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X[obj_cols] = ordinal_columns\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"MSE: 5352.19\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/zd/9vyg32393qncxwt_3r_873mh0000gn/T/ipykernel_29080/3308836053.py:43: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X[obj_cols] = ordinal_columns\n"
]
}
],
"source": [
"# Split train and test\n",
"X_dict_train, y_train, X_dict_test, y_test = dict_train_test_split(X_dict, y, ratio=0.9)\n",