{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOOB9uu9pwtnHtfajs04PdT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":16,"metadata":{"id":"DG5gGFyhgE_K","executionInfo":{"status":"ok","timestamp":1710599938894,"user_tz":-420,"elapsed":1049,"user":{"displayName":"Nhật Quang Đoàn","userId":"10175964550021301622"}}},"outputs":[],"source":["import numpy as np\n","import pandas as pd\n","from sklearn.datasets import load_breast_cancer, fetch_california_housing\n","from sklearn.feature_selection import SelectFromModel\n","from sklearn.linear_model import Lasso, LogisticRegression\n","from sklearn.model_selection import train_test_split\n","from sklearn.preprocessing import StandardScaler"]},{"cell_type":"code","source":["breast_cancer = load_breast_cancer()\n","X = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)\n","y = breast_cancer.target\n","X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)\n"],"metadata":{"id":"S9s_HLXVgTvu","executionInfo":{"status":"ok","timestamp":1710599940631,"user_tz":-420,"elapsed":313,"user":{"displayName":"Nhật Quang Đoàn","userId":"10175964550021301622"}}},"execution_count":17,"outputs":[]},{"cell_type":"code","source":["scaler = StandardScaler()\n","scaler.fit(X_train)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":74},"id":"TKmrxvrogXmH","executionInfo":{"status":"ok","timestamp":1710599949063,"user_tz":-420,"elapsed":397,"user":{"displayName":"Nhật Quang Đoàn","userId":"10175964550021301622"}},"outputId":"a163f11f-4b3d-4cac-a622-559b65b4f4f0"},"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/plain":["StandardScaler()"],"text/html":["
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"]},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["sel_ = SelectFromModel(\n"," LogisticRegression(C=0.5, penalty='l1', solver='liblinear', random_state=10))\n","sel_.fit(scaler.transform(X_train), y_train)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":117},"id":"nb_jL139gZKE","executionInfo":{"status":"ok","timestamp":1710599962515,"user_tz":-420,"elapsed":328,"user":{"displayName":"Nhật Quang Đoàn","userId":"10175964550021301622"}},"outputId":"1bc8144a-bce1-49d2-f825-31daf37aed86"},"execution_count":19,"outputs":[{"output_type":"execute_result","data":{"text/plain":["SelectFromModel(estimator=LogisticRegression(C=0.5, penalty='l1',\n"," random_state=10,\n"," solver='liblinear'))"],"text/html":["
SelectFromModel(estimator=LogisticRegression(C=0.5, penalty='l1',\n","                                             random_state=10,\n","                                             solver='liblinear'))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"]},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":["sel_.get_support()\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"B32FGTcggdDL","executionInfo":{"status":"ok","timestamp":1710600270396,"user_tz":-420,"elapsed":406,"user":{"displayName":"Nhật Quang Đoàn","userId":"10175964550021301622"}},"outputId":"ac504646-6016-4e53-b63f-e18d84b76fba"},"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([False, True, False, False, False, False, False, True, True,\n"," False, True, False, False, False, False, True, False, False,\n"," False, True, True, True, True, True, True, False, True,\n"," True, True, False])"]},"metadata":{},"execution_count":23}]},{"cell_type":"code","source":["removed_feats = X_train.columns[(sel_.estimator_.coef_ == 0).ravel().tolist()]\n","X_train_selected = sel_.transform(scaler.transform(X_train))\n","X_test_selected = sel_.transform(scaler.transform(X_test))"],"metadata":{"id":"kQXvJVKWuJ_D","executionInfo":{"status":"ok","timestamp":1710600383967,"user_tz":-420,"elapsed":356,"user":{"displayName":"Nhật Quang Đoàn","userId":"10175964550021301622"}}},"execution_count":25,"outputs":[]},{"cell_type":"code","source":["np.corrcoef(X)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gclRh90euoG1","executionInfo":{"status":"ok","timestamp":1710601180513,"user_tz":-420,"elapsed":324,"user":{"displayName":"Nhật Quang Đoàn","userId":"10175964550021301622"}},"outputId":"5904cd20-7780-4665-a57b-fdc60dc10bf7"},"execution_count":32,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[1. , 0.9892786 , 0.98703483, ..., 0.97919942, 0.98785229,\n"," 0.97683484],\n"," [0.9892786 , 1. , 0.999713 , ..., 0.99774113, 0.99972889,\n"," 0.98469916],\n"," [0.98703483, 0.999713 , 1. , ..., 0.99878867, 0.99988251,\n"," 0.98591229],\n"," ...,\n"," [0.97919942, 0.99774113, 0.99878867, ..., 1. , 0.99875905,\n"," 0.98800384],\n"," [0.98785229, 0.99972889, 0.99988251, ..., 0.99875905, 1. ,\n"," 0.98765632],\n"," [0.97683484, 0.98469916, 0.98591229, ..., 0.98800384, 0.98765632,\n"," 1. ]])"]},"metadata":{},"execution_count":32}]}]}