from google.colab import drive
drive.mount('/content/drive')
!pip install kaggle --upgrade
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls -l kaggle.json
!kaggle competitions download -c spaceship-titanic
# 파일 확인
!ls -l
# 압축 풀기
!unzip [파일명]
# 파일 확인
!ls -l
# 구글 드라이브 연결
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
# kaggle 패키지 설치
!pip install kaggle --upgrade
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Requirement already satisfied: kaggle in /usr/local/lib/python3.7/dist-packages (1.5.12) Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.24.3) Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2022.6.15) Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0) Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.23.0) Requirement already satisfied: python-slugify in /usr/local/lib/python3.7/dist-packages (from kaggle) (6.1.2) Requirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.8.2) Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.64.0) Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.7/dist-packages (from python-slugify->kaggle) (1.3) Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10) Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (3.0.4)
### 방법1 - 구글 드라이브에서 kaggle.json을 가져오기
!ls -l drive/MyDrive/
!cp drive/MyDrive/kaggle.json /content/
!ls -l /content/
total 902 drwx------ 2 root root 4096 Aug 8 2020 탈잉 drwx------ 2 root root 4096 Sep 9 2020 유튜브 drwx------ 2 root root 4096 Sep 1 2020 방송통신대학교_파이썬기본 -rw------- 1 root root 18644 Sep 25 2020 1_7_내장함수.ipynb drwx------ 2 root root 4096 Aug 3 2020 책번역_20200803 drwx------ 2 root root 4096 May 8 2020 빅데이터4기_서울IT drwx------ 2 root root 4096 Sep 20 2020 AI이노베이션 drwx------ 2 root root 4096 Jun 17 2020 Burj_Khalifa drwx------ 2 root root 4096 Jan 3 2020 'Colab Notebooks' drwx------ 2 root root 4096 Jun 17 2020 Colosseum drwx------ 2 root root 4096 Jul 20 2020 dataset -rw------- 1 root root 10842 Nov 18 2020 gan_deep_dream.ipynb drwx------ 2 root root 4096 May 1 2021 jds -rw------- 1 root root 398659 Mar 24 2021 샘플이미지.jpg -rw------- 1 root root 67 Jun 28 14:57 kaggle.json -rw------- 1 root root 444540 Jan 26 2021 서약.png drwx------ 2 root root 4096 Jun 17 2020 test total 12 drwx------ 5 root root 4096 Jun 29 09:23 drive -rw------- 1 root root 67 Jun 29 09:23 kaggle.json drwxr-xr-x 1 root root 4096 Jun 15 13:42 sample_data
# kaggle.json 업로드 후,
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls -l kaggle.json
-rw------- 1 root root 67 Jun 29 09:23 kaggle.json
# kaggle API를 이용하여 데이터 셋 다운로드 하기
!kaggle competitions download -c spaceship-titanic
Downloading spaceship-titanic.zip to /content 0% 0.00/299k [00:00<?, ?B/s] 100% 299k/299k [00:00<00:00, 83.1MB/s]
# 파일 확인
!ls -l
# 압축 풀기
!unzip spaceship-titanic.zip
!ls -l
total 312 drwx------ 5 root root 4096 Jun 29 09:23 drive -rw------- 1 root root 67 Jun 29 09:23 kaggle.json drwxr-xr-x 1 root root 4096 Jun 15 13:42 sample_data -rw-r--r-- 1 root root 306403 Jun 29 09:23 spaceship-titanic.zip Archive: spaceship-titanic.zip inflating: sample_submission.csv inflating: test.csv inflating: train.csv total 1524 drwx------ 5 root root 4096 Jun 29 09:23 drive -rw------- 1 root root 67 Jun 29 09:23 kaggle.json drwxr-xr-x 1 root root 4096 Jun 15 13:42 sample_data -rw-r--r-- 1 root root 59902 Feb 11 14:02 sample_submission.csv -rw-r--r-- 1 root root 306403 Jun 29 09:23 spaceship-titanic.zip -rw-r--r-- 1 root root 372487 Feb 11 14:02 test.csv -rw-r--r-- 1 root root 805421 Feb 11 14:02 train.csv
!pip install pycaret==2.3.10
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pycaret==2.3.10
Downloading pycaret-2.3.10-py3-none-any.whl (320 kB)
|████████████████████████████████| 320 kB 4.2 MB/s
Requirement already satisfied: IPython in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (5.5.0)
Collecting umap-learn
Downloading umap-learn-0.5.3.tar.gz (88 kB)
|████████████████████████████████| 88 kB 8.5 MB/s
Collecting mlflow
Downloading mlflow-1.27.0-py3-none-any.whl (17.9 MB)
|████████████████████████████████| 17.9 MB 576 kB/s
Collecting mlxtend>=0.17.0
Downloading mlxtend-0.20.0-py2.py3-none-any.whl (1.3 MB)
|████████████████████████████████| 1.3 MB 48.4 MB/s
Requirement already satisfied: wordcloud in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (1.5.0)
Collecting kmodes>=0.10.1
Downloading kmodes-0.12.1-py2.py3-none-any.whl (20 kB)
Requirement already satisfied: scipy<=1.5.4 in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (1.4.1)
Collecting pyLDAvis
Downloading pyLDAvis-3.3.1.tar.gz (1.7 MB)
|████████████████████████████████| 1.7 MB 27.4 MB/s
Installing build dependencies ... done
Getting requirements to build wheel ... done
Installing backend dependencies ... done
Preparing wheel metadata ... done
Requirement already satisfied: numba<0.55 in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (0.51.2)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (3.2.2)
Requirement already satisfied: pyyaml<6.0.0 in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (3.13)
Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (3.7)
Requirement already satisfied: ipywidgets in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (7.7.0)
Collecting spacy<2.4.0
Downloading spacy-2.3.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.4 MB)
|████████████████████████████████| 10.4 MB 41.5 MB/s
Collecting imbalanced-learn==0.7.0
Downloading imbalanced_learn-0.7.0-py3-none-any.whl (167 kB)
|████████████████████████████████| 167 kB 76.9 MB/s
Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (1.3.5)
Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (0.11.2)
Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (1.1.0)
Requirement already satisfied: cufflinks>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (0.17.3)
Collecting scikit-plot
Downloading scikit_plot-0.3.7-py3-none-any.whl (33 kB)
Collecting lightgbm>=2.3.1
Downloading lightgbm-3.3.2-py3-none-manylinux1_x86_64.whl (2.0 MB)
|████████████████████████████████| 2.0 MB 43.4 MB/s
Requirement already satisfied: yellowbrick>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (1.4)
Collecting scikit-learn==0.23.2
Downloading scikit_learn-0.23.2-cp37-cp37m-manylinux1_x86_64.whl (6.8 MB)
|████████████████████████████████| 6.8 MB 42.3 MB/s
Collecting pyod
Downloading pyod-1.0.2.tar.gz (122 kB)
|████████████████████████████████| 122 kB 60.0 MB/s
Requirement already satisfied: plotly>=4.4.1 in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (5.5.0)
Collecting pandas-profiling>=2.8.0
Downloading pandas_profiling-3.2.0-py2.py3-none-any.whl (262 kB)
|████████████████████████████████| 262 kB 5.8 MB/s
Requirement already satisfied: gensim<4.0.0 in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (3.6.0)
Collecting Boruta
Downloading Boruta-0.3-py3-none-any.whl (56 kB)
|████████████████████████████████| 56 kB 5.5 MB/s
Requirement already satisfied: textblob in /usr/local/lib/python3.7/dist-packages (from pycaret==2.3.10) (0.15.3)
Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.7/dist-packages (from imbalanced-learn==0.7.0->pycaret==2.3.10) (1.21.6)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn==0.23.2->pycaret==2.3.10) (3.1.0)
Requirement already satisfied: colorlover>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from cufflinks>=0.17.0->pycaret==2.3.10) (0.3.0)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from cufflinks>=0.17.0->pycaret==2.3.10) (1.15.0)
Requirement already satisfied: setuptools>=34.4.1 in /usr/local/lib/python3.7/dist-packages (from cufflinks>=0.17.0->pycaret==2.3.10) (57.4.0)
Requirement already satisfied: smart-open>=1.2.1 in /usr/local/lib/python3.7/dist-packages (from gensim<4.0.0->pycaret==2.3.10) (5.2.1)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret==2.3.10) (5.1.1)
Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret==2.3.10) (1.0.18)
Requirement already satisfied: pexpect in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret==2.3.10) (4.8.0)
Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret==2.3.10) (0.8.1)
Requirement already satisfied: pygments in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret==2.3.10) (2.6.1)
Requirement already satisfied: decorator in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret==2.3.10) (4.4.2)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret==2.3.10) (0.7.5)
Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret==2.3.10) (1.1.0)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret==2.3.10) (4.10.1)
Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret==2.3.10) (5.4.0)
Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret==2.3.10) (0.2.0)
Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret==2.3.10) (3.6.0)
Requirement already satisfied: tornado>=4.0 in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets->pycaret==2.3.10) (5.1.1)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets->pycaret==2.3.10) (5.3.5)
Requirement already satisfied: wheel in /usr/local/lib/python3.7/dist-packages (from lightgbm>=2.3.1->pycaret==2.3.10) (0.37.1)
Collecting mlxtend>=0.17.0
Downloading mlxtend-0.19.0-py2.py3-none-any.whl (1.3 MB)
|████████████████████████████████| 1.3 MB 48.9 MB/s
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret==2.3.10) (0.11.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret==2.3.10) (1.4.3)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret==2.3.10) (2.8.2)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret==2.3.10) (3.0.9)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->pycaret==2.3.10) (4.1.1)
Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (4.3.3)
Requirement already satisfied: jupyter-core in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (4.10.0)
Requirement already satisfied: fastjsonschema in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (2.15.3)
Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (21.4.0)
Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (5.7.1)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (0.18.1)
Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (4.11.4)
Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from importlib-resources>=1.4.0->jsonschema>=2.6->nbformat>=4.2.0->ipywidgets->pycaret==2.3.10) (3.8.0)
Requirement already satisfied: llvmlite<0.35,>=0.34.0.dev0 in /usr/local/lib/python3.7/dist-packages (from numba<0.55->pycaret==2.3.10) (0.34.0)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->pycaret==2.3.10) (2022.1)
Requirement already satisfied: missingno>=0.4.2 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret==2.3.10) (0.5.1)
Collecting visions[type_image_path]==0.7.4
Downloading visions-0.7.4-py3-none-any.whl (102 kB)
|████████████████████████████████| 102 kB 13.8 MB/s
Collecting tangled-up-in-unicode==0.2.0
Downloading tangled_up_in_unicode-0.2.0-py3-none-any.whl (4.7 MB)
|████████████████████████████████| 4.7 MB 50.1 MB/s
Collecting requests>=2.24.0
Downloading requests-2.28.0-py3-none-any.whl (62 kB)
|████████████████████████████████| 62 kB 1.7 MB/s
Collecting markupsafe~=2.1.1
Downloading MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)
Collecting phik>=0.11.1
Downloading phik-0.12.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (690 kB)
|████████████████████████████████| 690 kB 70.8 MB/s
Collecting multimethod>=1.4
Downloading multimethod-1.8-py3-none-any.whl (9.8 kB)
Collecting pyyaml<6.0.0
Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
|████████████████████████████████| 636 kB 54.8 MB/s
Requirement already satisfied: pydantic>=1.8.1 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret==2.3.10) (1.8.2)
Requirement already satisfied: jinja2>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret==2.3.10) (2.11.3)
Requirement already satisfied: tqdm>=4.48.2 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret==2.3.10) (4.64.0)
Collecting htmlmin>=0.1.12
Downloading htmlmin-0.1.12.tar.gz (19 kB)
Requirement already satisfied: networkx>=2.4 in /usr/local/lib/python3.7/dist-packages (from visions[type_image_path]==0.7.4->pandas-profiling>=2.8.0->pycaret==2.3.10) (2.6.3)
Requirement already satisfied: Pillow in /usr/local/lib/python3.7/dist-packages (from visions[type_image_path]==0.7.4->pandas-profiling>=2.8.0->pycaret==2.3.10) (7.1.2)
Collecting imagehash
Downloading ImageHash-4.2.1.tar.gz (812 kB)
|████████████████████████████████| 812 kB 49.5 MB/s
Collecting scipy<=1.5.4
Downloading scipy-1.5.4-cp37-cp37m-manylinux1_x86_64.whl (25.9 MB)
|████████████████████████████████| 25.9 MB 1.4 MB/s
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from plotly>=4.4.1->pycaret==2.3.10) (8.0.1)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->IPython->pycaret==2.3.10) (0.2.5)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret==2.3.10) (2022.6.15)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret==2.3.10) (2.10)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret==2.3.10) (1.24.3)
Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret==2.3.10) (2.0.12)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret==2.3.10) (3.0.6)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret==2.3.10) (1.0.7)
Collecting plac<1.2.0,>=0.9.6
Downloading plac-1.1.3-py2.py3-none-any.whl (20 kB)
Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret==2.3.10) (0.7.7)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret==2.3.10) (2.0.6)
Collecting srsly<1.1.0,>=1.0.2
Downloading srsly-1.0.5-cp37-cp37m-manylinux2014_x86_64.whl (184 kB)
|████████████████████████████████| 184 kB 62.8 MB/s
Collecting thinc<7.5.0,>=7.4.1
Downloading thinc-7.4.5-cp37-cp37m-manylinux2014_x86_64.whl (1.0 MB)
|████████████████████████████████| 1.0 MB 44.1 MB/s
Collecting catalogue<1.1.0,>=0.0.7
Downloading catalogue-1.0.0-py2.py3-none-any.whl (7.7 kB)
Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret==2.3.10) (0.9.1)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.7/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (5.3.1)
Requirement already satisfied: terminado>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (0.13.3)
Requirement already satisfied: Send2Trash in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (1.8.0)
Requirement already satisfied: nbconvert in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (5.6.1)
Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.7/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets->pycaret==2.3.10) (23.1.0)
Requirement already satisfied: ptyprocess in /usr/local/lib/python3.7/dist-packages (from terminado>=0.8.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (0.7.0)
Collecting yellowbrick>=1.0.1
Downloading yellowbrick-1.3.post1-py3-none-any.whl (271 kB)
|████████████████████████████████| 271 kB 60.8 MB/s
Collecting numpy>=1.13.3
Downloading numpy-1.19.5-cp37-cp37m-manylinux2010_x86_64.whl (14.8 MB)
|████████████████████████████████| 14.8 MB 296 kB/s
Requirement already satisfied: PyWavelets in /usr/local/lib/python3.7/dist-packages (from imagehash->visions[type_image_path]==0.7.4->pandas-profiling>=2.8.0->pycaret==2.3.10) (1.3.0)
Collecting querystring-parser
Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)
Collecting docker>=4.0.0
Downloading docker-5.0.3-py2.py3-none-any.whl (146 kB)
|████████████████████████████████| 146 kB 60.3 MB/s
Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (7.1.2)
Collecting databricks-cli>=0.8.7
Downloading databricks-cli-0.17.0.tar.gz (81 kB)
|████████████████████████████████| 81 kB 9.9 MB/s
Requirement already satisfied: sqlparse>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (0.4.2)
Collecting gunicorn
Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)
|████████████████████████████████| 79 kB 9.1 MB/s
Requirement already satisfied: entrypoints in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (0.4)
Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (21.3)
Requirement already satisfied: Flask in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (1.1.4)
Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (1.3.0)
Collecting prometheus-flask-exporter
Downloading prometheus_flask_exporter-0.20.2-py3-none-any.whl (18 kB)
Requirement already satisfied: sqlalchemy>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (1.4.37)
Collecting alembic
Downloading alembic-1.8.0-py3-none-any.whl (209 kB)
|████████████████████████████████| 209 kB 56.7 MB/s
Requirement already satisfied: protobuf>=3.12.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret==2.3.10) (3.17.3)
Collecting gitpython>=2.1.0
Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
|████████████████████████████████| 181 kB 62.8 MB/s
Collecting pyjwt>=1.7.0
Downloading PyJWT-2.4.0-py3-none-any.whl (18 kB)
Requirement already satisfied: oauthlib>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from databricks-cli>=0.8.7->mlflow->pycaret==2.3.10) (3.2.0)
Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.7/dist-packages (from databricks-cli>=0.8.7->mlflow->pycaret==2.3.10) (0.8.9)
Collecting websocket-client>=0.32.0
Downloading websocket_client-1.3.3-py3-none-any.whl (54 kB)
|████████████████████████████████| 54 kB 3.3 MB/s
Collecting gitdb<5,>=4.0.1
Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
|████████████████████████████████| 63 kB 2.0 MB/s
Collecting smmap<6,>=3.0.1
Downloading smmap-5.0.0-py3-none-any.whl (24 kB)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.7/dist-packages (from sqlalchemy>=1.4.0->mlflow->pycaret==2.3.10) (1.1.2)
Collecting Mako
Downloading Mako-1.2.0-py3-none-any.whl (78 kB)
|████████████████████████████████| 78 kB 7.7 MB/s
Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from Flask->mlflow->pycaret==2.3.10) (1.0.1)
Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from Flask->mlflow->pycaret==2.3.10) (1.1.0)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (1.5.0)
Requirement already satisfied: bleach in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (5.0.0)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (0.7.1)
Requirement already satisfied: testpath in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (0.6.0)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (0.8.4)
Requirement already satisfied: webencodings in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets->pycaret==2.3.10) (0.5.1)
Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.7/dist-packages (from nltk->pycaret==2.3.10) (2022.6.2)
Requirement already satisfied: prometheus-client in /usr/local/lib/python3.7/dist-packages (from prometheus-flask-exporter->mlflow->pycaret==2.3.10) (0.14.1)
Requirement already satisfied: sklearn in /usr/local/lib/python3.7/dist-packages (from pyLDAvis->pycaret==2.3.10) (0.0)
Requirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from pyLDAvis->pycaret==2.3.10) (0.16.0)
Requirement already satisfied: numexpr in /usr/local/lib/python3.7/dist-packages (from pyLDAvis->pycaret==2.3.10) (2.8.1)
Collecting funcy
Downloading funcy-1.17-py2.py3-none-any.whl (33 kB)
Collecting pyLDAvis
Downloading pyLDAvis-3.3.0.tar.gz (1.7 MB)
|████████████████████████████████| 1.7 MB 49.5 MB/s
Installing build dependencies ... done
Getting requirements to build wheel ... done
Installing backend dependencies ... done
Preparing wheel metadata ... done
Downloading pyLDAvis-3.2.2.tar.gz (1.7 MB)
|████████████████████████████████| 1.7 MB 31.9 MB/s
Requirement already satisfied: statsmodels in /usr/local/lib/python3.7/dist-packages (from pyod->pycaret==2.3.10) (0.10.2)
Requirement already satisfied: patsy>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from statsmodels->pyod->pycaret==2.3.10) (0.5.2)
Collecting pynndescent>=0.5
Downloading pynndescent-0.5.7.tar.gz (1.1 MB)
|████████████████████████████████| 1.1 MB 41.8 MB/s
Building wheels for collected packages: htmlmin, imagehash, databricks-cli, pyLDAvis, pyod, umap-learn, pynndescent
Building wheel for htmlmin (setup.py) ... done
Created wheel for htmlmin: filename=htmlmin-0.1.12-py3-none-any.whl size=27098 sha256=6ab84055b5f771db87e750ccf0e991dc9da254b79ba5e94599ac7cfb3f6dd7fc
Stored in directory: /root/.cache/pip/wheels/70/e1/52/5b14d250ba868768823940c3229e9950d201a26d0bd3ee8655
Building wheel for imagehash (setup.py) ... done
Created wheel for imagehash: filename=ImageHash-4.2.1-py2.py3-none-any.whl size=295206 sha256=93cf5e761a012d1a63215094807a8baa06c554a896b7b0af8d7f0aaa707b8d68
Stored in directory: /root/.cache/pip/wheels/4c/d5/59/5e3e297533ddb09407769762985d134135064c6831e29a914e
Building wheel for databricks-cli (setup.py) ... done
Created wheel for databricks-cli: filename=databricks_cli-0.17.0-py3-none-any.whl size=141960 sha256=f8729ae16063314bc5910c2d04693da8baea4c9dc9871a8cb4a88f5db0a07122
Stored in directory: /root/.cache/pip/wheels/55/c3/db/33705569425fd2bdc9ea73051a8053fa26965c2bce8a146747
Building wheel for pyLDAvis (setup.py) ... done
Created wheel for pyLDAvis: filename=pyLDAvis-3.2.2-py2.py3-none-any.whl size=135617 sha256=d13232464b3d49dd9185f8cd59bd2b596da937e1498102b4a3657e4cc696338c
Stored in directory: /root/.cache/pip/wheels/f8/b1/9b/560ac1931796b7303f7b517b949d2d31a4fbc512aad3b9f284
Building wheel for pyod (setup.py) ... done
Created wheel for pyod: filename=pyod-1.0.2-py3-none-any.whl size=150272 sha256=0600aaa5e87bddcd7a4f51bac7323b53f33aede50cb894b0ebe1ce91f3071117
Stored in directory: /root/.cache/pip/wheels/e6/8f/06/5512935ed3c79659f612e8bb8f43cb51dd47c21973e0230997
Building wheel for umap-learn (setup.py) ... done
Created wheel for umap-learn: filename=umap_learn-0.5.3-py3-none-any.whl size=82829 sha256=788a07ec7e4926732d5d590395fd08c936024e61fbd95bfcbd11d24af1eedac7
Stored in directory: /root/.cache/pip/wheels/b3/52/a5/1fd9e3e76a7ab34f134c07469cd6f16e27ef3a37aeff1fe821
Building wheel for pynndescent (setup.py) ... done
Created wheel for pynndescent: filename=pynndescent-0.5.7-py3-none-any.whl size=54286 sha256=0e7a8a900d8deefb4e44d71ac83902176ae2edd1adac67894e6e37b39df192b2
Stored in directory: /root/.cache/pip/wheels/7f/2a/f8/7bd5dcec71bd5c669f6f574db3113513696b98f3f9b51f496c
Successfully built htmlmin imagehash databricks-cli pyLDAvis pyod umap-learn pynndescent
Installing collected packages: markupsafe, numpy, tangled-up-in-unicode, smmap, scipy, multimethod, websocket-client, visions, srsly, scikit-learn, requests, pyjwt, plac, Mako, imagehash, gitdb, catalogue, thinc, querystring-parser, pyyaml, pynndescent, prometheus-flask-exporter, phik, htmlmin, gunicorn, gitpython, funcy, docker, databricks-cli, alembic, yellowbrick, umap-learn, spacy, scikit-plot, pyod, pyLDAvis, pandas-profiling, mlxtend, mlflow, lightgbm, kmodes, imbalanced-learn, Boruta, pycaret
Attempting uninstall: markupsafe
Found existing installation: MarkupSafe 2.0.1
Uninstalling MarkupSafe-2.0.1:
Successfully uninstalled MarkupSafe-2.0.1
Attempting uninstall: numpy
Found existing installation: numpy 1.21.6
Uninstalling numpy-1.21.6:
Successfully uninstalled numpy-1.21.6
Attempting uninstall: scipy
Found existing installation: scipy 1.4.1
Uninstalling scipy-1.4.1:
Successfully uninstalled scipy-1.4.1
Attempting uninstall: srsly
Found existing installation: srsly 2.4.3
Uninstalling srsly-2.4.3:
Successfully uninstalled srsly-2.4.3
Attempting uninstall: scikit-learn
Found existing installation: scikit-learn 1.0.2
Uninstalling scikit-learn-1.0.2:
Successfully uninstalled scikit-learn-1.0.2
Attempting uninstall: requests
Found existing installation: requests 2.23.0
Uninstalling requests-2.23.0:
Successfully uninstalled requests-2.23.0
Attempting uninstall: catalogue
Found existing installation: catalogue 2.0.7
Uninstalling catalogue-2.0.7:
Successfully uninstalled catalogue-2.0.7
Attempting uninstall: thinc
Found existing installation: thinc 8.0.17
Uninstalling thinc-8.0.17:
Successfully uninstalled thinc-8.0.17
Attempting uninstall: pyyaml
Found existing installation: PyYAML 3.13
Uninstalling PyYAML-3.13:
Successfully uninstalled PyYAML-3.13
Attempting uninstall: yellowbrick
Found existing installation: yellowbrick 1.4
Uninstalling yellowbrick-1.4:
Successfully uninstalled yellowbrick-1.4
Attempting uninstall: spacy
Found existing installation: spacy 3.3.1
Uninstalling spacy-3.3.1:
Successfully uninstalled spacy-3.3.1
Attempting uninstall: pandas-profiling
Found existing installation: pandas-profiling 1.4.1
Uninstalling pandas-profiling-1.4.1:
Successfully uninstalled pandas-profiling-1.4.1
Attempting uninstall: mlxtend
Found existing installation: mlxtend 0.14.0
Uninstalling mlxtend-0.14.0:
Successfully uninstalled mlxtend-0.14.0
Attempting uninstall: lightgbm
Found existing installation: lightgbm 2.2.3
Uninstalling lightgbm-2.2.3:
Successfully uninstalled lightgbm-2.2.3
Attempting uninstall: imbalanced-learn
Found existing installation: imbalanced-learn 0.8.1
Uninstalling imbalanced-learn-0.8.1:
Successfully uninstalled imbalanced-learn-0.8.1
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
xarray-einstats 0.2.2 requires numpy>=1.21, but you have numpy 1.19.5 which is incompatible.
tensorflow 2.8.2+zzzcolab20220527125636 requires numpy>=1.20, but you have numpy 1.19.5 which is incompatible.
google-colab 1.0.0 requires requests~=2.23.0, but you have requests 2.28.0 which is incompatible.
en-core-web-sm 3.3.0 requires spacy<3.4.0,>=3.3.0.dev0, but you have spacy 2.3.7 which is incompatible.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.
Successfully installed Boruta-0.3 Mako-1.2.0 alembic-1.8.0 catalogue-1.0.0 databricks-cli-0.17.0 docker-5.0.3 funcy-1.17 gitdb-4.0.9 gitpython-3.1.27 gunicorn-20.1.0 htmlmin-0.1.12 imagehash-4.2.1 imbalanced-learn-0.7.0 kmodes-0.12.1 lightgbm-3.3.2 markupsafe-2.1.1 mlflow-1.27.0 mlxtend-0.19.0 multimethod-1.8 numpy-1.19.5 pandas-profiling-3.2.0 phik-0.12.2 plac-1.1.3 prometheus-flask-exporter-0.20.2 pyLDAvis-3.2.2 pycaret-2.3.10 pyjwt-2.4.0 pynndescent-0.5.7 pyod-1.0.2 pyyaml-5.4.1 querystring-parser-1.2.4 requests-2.28.0 scikit-learn-0.23.2 scikit-plot-0.3.7 scipy-1.5.4 smmap-5.0.0 spacy-2.3.7 srsly-1.0.5 tangled-up-in-unicode-0.2.0 thinc-7.4.5 umap-learn-0.5.3 visions-0.7.4 websocket-client-1.3.3 yellowbrick-1.3.post1
import pycaret
from pycaret.classification import *
from IPython.display import Image, display
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-2-be86b9534894> in <module>() 1 import pycaret ----> 2 from pycaret.classification import * 3 from IPython.display import Image, display 4 import pandas as pd 5 import matplotlib.pyplot as plt /usr/local/lib/python3.7/dist-packages/pycaret/classification.py in <module>() 8 import numpy as np 9 ---> 10 import pycaret.internal.tabular 11 from pycaret.loggers.base_logger import BaseLogger 12 from pycaret.parallel import ParallelBackend /usr/local/lib/python3.7/dist-packages/pycaret/internal/tabular.py in <module>() 14 get_estimator_from_meta_estimator, 15 ) ---> 16 from pycaret.internal.pipeline import ( 17 add_estimator_to_pipeline, 18 get_pipeline_estimator_label, /usr/local/lib/python3.7/dist-packages/pycaret/internal/pipeline.py in <module>() 9 # This pipeline is only to be used internally. 10 ---> 11 from pycaret.internal.utils import get_all_object_vars_and_properties, is_fit_var 12 import imblearn.pipeline 13 from sklearn.utils import _print_elapsed_time /usr/local/lib/python3.7/dist-packages/pycaret/internal/utils.py in <module>() 8 from pycaret.containers.models.base_model import ModelContainer 9 import pandas as pd ---> 10 import pandas.io.formats.style 11 import ipywidgets as ipw 12 from IPython.display import display, HTML, clear_output, update_display /usr/local/lib/python3.7/dist-packages/pandas/io/formats/style.py in <module>() 47 from pandas.io.formats.format import save_to_buffer 48 ---> 49 jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") 50 51 from pandas.io.formats.style_render import ( /usr/local/lib/python3.7/dist-packages/pandas/compat/_optional.py in import_optional_dependency(name, extra, errors, min_version) 116 except ImportError: 117 if errors == "raise": --> 118 raise ImportError(msg) from None 119 else: 120 return None ImportError: Missing optional dependency 'Jinja2'. DataFrame.style requires jinja2. Use pip or conda to install Jinja2. --------------------------------------------------------------------------- NOTE: If your import is failing due to a missing package, you can manually install dependencies using either !pip or !apt. To view examples of installing some common dependencies, click the "Open Examples" button below. ---------------------------------------------------------------------------
from pycaret.classification import *
ImportError: Missing optional dependency 'Jinja2'. DataFrame.style requires jinja2. Use pip or conda to install Jinja2.
pip3 install jinja2==3.0.1
pip install markupsafe==2.0.1
!pip install jinja2==3.0.1
!pip install markupsafe==2.0.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Collecting jinja2==3.0.1 Downloading Jinja2-3.0.1-py3-none-any.whl (133 kB) |████████████████████████████████| 133 kB 4.1 MB/s Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.7/dist-packages (from jinja2==3.0.1) (2.1.1) Installing collected packages: jinja2 Attempting uninstall: jinja2 Found existing installation: Jinja2 2.11.3 Uninstalling Jinja2-2.11.3: Successfully uninstalled Jinja2-2.11.3 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. google-colab 1.0.0 requires requests~=2.23.0, but you have requests 2.28.0 which is incompatible. flask 1.1.4 requires Jinja2<3.0,>=2.10.1, but you have jinja2 3.0.1 which is incompatible. datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible. Successfully installed jinja2-3.0.1 Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Collecting markupsafe==2.0.1 Downloading MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (31 kB) Installing collected packages: markupsafe Attempting uninstall: markupsafe Found existing installation: MarkupSafe 2.1.1 Uninstalling MarkupSafe-2.1.1: Successfully uninstalled MarkupSafe-2.1.1 ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. pandas-profiling 3.2.0 requires markupsafe~=2.1.1, but you have markupsafe 2.0.1 which is incompatible. flask 1.1.4 requires Jinja2<3.0,>=2.10.1, but you have jinja2 3.0.1 which is incompatible. datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible. Successfully installed markupsafe-2.0.1
# 메모리 정리
# gc.collect() : 가비지 컬렉션을 수행
import gc
gc.collect()
128
import pycaret
from pycaret.classification import *
from IPython.display import Image, display
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
print(pycaret.__version__)
print(sns.__version__)
print(pd.__version__)
/usr/local/lib/python3.7/dist-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details. defaults = yaml.load(f)
2.3.10 0.11.2 1.3.5
# 기본 라이브러리
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# 머신러닝 관련 라이브러리
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, train_test_split
# 시각화 관련 라이브러리
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
PALETTE=['lightcoral', 'lightskyblue', 'gold', 'sandybrown', 'navajowhite',
'khaki', 'lightslategrey', 'turquoise', 'rosybrown', 'thistle', 'pink']
sns.set_palette(PALETTE)
BACKCOLOR = '#f6f5f5'
from IPython.core.display import HTML
def multi_table(table_list):
return HTML(
f"<table><tr> {''.join(['<td>' + table._repr_html_() + '</td>' for table in table_list])} </tr></table>")
# 데이터 불러오기
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
submission = pd.read_csv("sample_submission.csv")
all_data = pd.concat([train, test], axis=0)
all_data.head(10).style.background_gradient()
PassengerId | HomePlanet | CryoSleep | Cabin | Destination | Age | VIP | RoomService | FoodCourt | ShoppingMall | Spa | VRDeck | Name | Transported | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0001_01 | Europa | False | B/0/P | TRAPPIST-1e | 39.000000 | False | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | Maham Ofracculy | False |
1 | 0002_01 | Earth | False | F/0/S | TRAPPIST-1e | 24.000000 | False | 109.000000 | 9.000000 | 25.000000 | 549.000000 | 44.000000 | Juanna Vines | True |
2 | 0003_01 | Europa | False | A/0/S | TRAPPIST-1e | 58.000000 | True | 43.000000 | 3576.000000 | 0.000000 | 6715.000000 | 49.000000 | Altark Susent | False |
3 | 0003_02 | Europa | False | A/0/S | TRAPPIST-1e | 33.000000 | False | 0.000000 | 1283.000000 | 371.000000 | 3329.000000 | 193.000000 | Solam Susent | False |
4 | 0004_01 | Earth | False | F/1/S | TRAPPIST-1e | 16.000000 | False | 303.000000 | 70.000000 | 151.000000 | 565.000000 | 2.000000 | Willy Santantines | True |
5 | 0005_01 | Earth | False | F/0/P | PSO J318.5-22 | 44.000000 | False | 0.000000 | 483.000000 | 0.000000 | 291.000000 | 0.000000 | Sandie Hinetthews | True |
6 | 0006_01 | Earth | False | F/2/S | TRAPPIST-1e | 26.000000 | False | 42.000000 | 1539.000000 | 3.000000 | 0.000000 | 0.000000 | Billex Jacostaffey | True |
7 | 0006_02 | Earth | True | G/0/S | TRAPPIST-1e | 28.000000 | False | 0.000000 | 0.000000 | 0.000000 | 0.000000 | nan | Candra Jacostaffey | True |
8 | 0007_01 | Earth | False | F/3/S | TRAPPIST-1e | 35.000000 | False | 0.000000 | 785.000000 | 17.000000 | 216.000000 | 0.000000 | Andona Beston | True |
9 | 0008_01 | Europa | True | B/1/P | 55 Cancri e | 14.000000 | False | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | Erraiam Flatic | True |
print(f'train size : {train.shape[0]} x {train.shape[1]}')
print(f'test size : {test.shape[0]} x {test.shape[1]}')
print(f'total size : {all_data.shape[0]} x {all_data.shape[1]}')
train size : 8693 x 14 test size : 4277 x 13 total size : 12970 x 14
display(all_data.columns)
print()
display(all_data.dtypes)
print()
display(all_data.info())
Index(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'Age', 'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 'Name', 'Transported'], dtype='object')
PassengerId object HomePlanet object CryoSleep object Cabin object Destination object Age float64 VIP object RoomService float64 FoodCourt float64 ShoppingMall float64 Spa float64 VRDeck float64 Name object Transported object dtype: object
<class 'pandas.core.frame.DataFrame'> Int64Index: 12970 entries, 0 to 4276 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 PassengerId 12970 non-null object 1 HomePlanet 12682 non-null object 2 CryoSleep 12660 non-null object 3 Cabin 12671 non-null object 4 Destination 12696 non-null object 5 Age 12700 non-null float64 6 VIP 12674 non-null object 7 RoomService 12707 non-null float64 8 FoodCourt 12681 non-null float64 9 ShoppingMall 12664 non-null float64 10 Spa 12686 non-null float64 11 VRDeck 12702 non-null float64 12 Name 12676 non-null object 13 Transported 8693 non-null object dtypes: float64(6), object(8) memory usage: 1.5+ MB
None
nominal_vars = ['HomePlanet', 'CryoSleep', 'Cabin', 'Desination', 'VIP', 'Name']
continuous_vars = ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
target = 'Transported'
train_st = train[continuous_vars].describe()
test_st = test[continuous_vars].describe()
all_st = all_data[continuous_vars].describe()
multi_table([all_st, train_st, test_st])
|
|
|
import missingno as msno
msno.matrix(all_data)
<matplotlib.axes._subplots.AxesSubplot at 0x7f8411feed10>
msno.bar(all_data)
<matplotlib.axes._subplots.AxesSubplot at 0x7f8411f54690>
# Target 컬럼(Transported) 시각화
plt.subplots(figsize=(25, 10))
plt.pie(train.Transported.value_counts(),
shadow=True, explode=[.03,.03],
autopct='%1.1f%%',
textprops={'fontsize': 20, 'color': 'white'})
plt.title('Transported Distribution', size=20)
plt.legend(['False', 'True'], loc='best', fontsize=12)
plt.show()
# 히트맵을 이용한 시각화
# 데이터의 비율을 표시
# Heatmap can visualize continuous values (or binary variables) in categories and categories.
plt.subplots(figsize=(10, 5))
g = sns.heatmap(train.pivot_table(index='HomePlanet',
columns='CryoSleep',
values='Transported'), annot=True, cmap="YlGnBu")
# 제목, x,y레이블
g.set_title('Transported ratio by HomePlanet and CryoSleep',
weight='bold', size=15)
g.set_xlabel('CryoSleep',
weight='bold', size=13)
g.set_ylabel('HomePlanet',
weight='bold', size=13)
plt.show()
pd.crosstab([train.CryoSleep,
train.Transported],
train.HomePlanet,margins=True).style.background_gradient()
HomePlanet | Earth | Europa | Mars | All | |
---|---|---|---|---|---|
CryoSleep | Transported | ||||
False | False | 2109 | 697 | 757 | 3563 |
True | 997 | 465 | 290 | 1752 | |
True | False | 475 | 10 | 59 | 544 |
True | 907 | 901 | 610 | 2418 | |
All | 4488 | 2073 | 1716 | 8277 |
# Target의 데이터의 전체에서 True가 가지는 비율 시각화
# y축 : HomePlanet, x축 : Destination, 값 : Transported
plt.subplots(figsize=(10, 5))
g = sns.heatmap(train.pivot_table(index='HomePlanet',
columns='Destination',
values='Transported'),
annot=True, cmap="YlGnBu")
g.set_title('Transported ratio by HomePlanet and Destination',
weight='bold', size=15)
g.set_xlabel('Destination',
weight='bold', size=13)
g.set_ylabel('HomePlanet',
weight='bold', size=13)
plt.show()
pd.crosstab([train.Destination, train.Transported],
train.HomePlanet,margins=True).style.background_gradient()
HomePlanet | Earth | Europa | Mars | All | |
---|---|---|---|---|---|
Destination | Transported | ||||
55 Cancri e | False | 342 | 275 | 75 | 692 |
True | 348 | 611 | 118 | 1077 | |
PSO J318.5-22 | False | 357 | 5 | 27 | 389 |
True | 355 | 14 | 22 | 391 | |
TRAPPIST-1e | False | 1894 | 434 | 720 | 3048 |
True | 1207 | 755 | 755 | 2717 | |
All | 4503 | 2094 | 1717 | 8314 |
plt.subplots(figsize=(10, 5))
g = sns.heatmap(train.pivot_table(index='CryoSleep',
columns='Destination',
values='Transported'),
annot=True, cmap="YlGnBu")
g.set_title('Transported ratio by CryoSleep and Destination',
weight='bold', size=15)
g.set_xlabel('Destination',
weight='bold', size=13)
g.set_ylabel('CryoSleep',
weight='bold', size=13)
plt.show()
pd.crosstab([train.CryoSleep, train.Transported],
train.Destination,margins=True).style.background_gradient()
Destination | 55 Cancri e | PSO J318.5-22 | TRAPPIST-1e | All | |
---|---|---|---|---|---|
CryoSleep | Transported | ||||
False | False | 630 | 265 | 2669 | 3564 |
True | 387 | 129 | 1229 | 1745 | |
True | False | 53 | 119 | 379 | 551 |
True | 686 | 264 | 1488 | 2438 | |
All | 1756 | 777 | 5765 | 8298 |
# CryoSleep의 값 빈도 확인
print( all_data['CryoSleep'].value_counts() ) # 다수의 값이 Fasle를 가진다.
False 8079 True 4581 Name: CryoSleep, dtype: int64
# 결측치 처리. 범주의 형일 경우, 다수의 값으로 채운다.
# Replace categorical variables with specific values (False, None) or freeest values.
all_data['CryoSleep'].fillna(False, inplace=True)
all_data['Cabin'].fillna('None', inplace=True)
all_data['VIP'].fillna(all_data.VIP.mode()[0], inplace=True)
all_data['HomePlanet'].fillna(all_data.HomePlanet.mode()[0], inplace=True)
all_data['Destination'].fillna(all_data.Destination.mode()[0], inplace=True)
# 연속형 변수의 결측치는 0 또는 평균으로 대치시킨다.
all_data['Age'].fillna(all_data.Age.mean(), inplace=True)
all_data[['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']] =\
all_data[['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']].fillna(0)
# As mentioned earlier, create a new variable by decomposing strings in Cabin and PassengerId.
all_data['Deck'] = all_data.Cabin.apply(lambda x:str(x)[:1])
all_data['Side'] = all_data.Cabin.apply(lambda x:str(x)[-1:])
all_data['PassengerGroup'] = all_data['PassengerId'].apply(lambda x: x.split('_')[0])
all_data['PassengerNo'] = all_data['PassengerId'].apply(lambda x: x.split('_')[1])
# 컬럼에서의 다양한 서비스와 관련 있는 내용을 이용하여 새로운 변수를 생성.
all_data['TotalSpend'] = all_data['RoomService'] + all_data['FoodCourt'] +\
all_data['ShoppingMall'] + all_data['Spa'] + all_data['VRDeck']
all_data['PctRoomService'] = all_data['RoomService']/all_data['TotalSpend']
all_data['PctFoodCourt'] = all_data['FoodCourt']/all_data['TotalSpend']
all_data['PctShoppingMall'] = all_data['ShoppingMall']/all_data['TotalSpend']
all_data['PctSpa'] = all_data['Spa']/all_data['TotalSpend']
all_data['PctVRDeck'] = all_data['VRDeck']/all_data['TotalSpend']
# Create new variables by dividing age groups.
all_data['AgeBin'] = 7
for i in range(6):
all_data.loc[(all_data.Age >= 10*i) & (all_data.Age < 10*(i + 1)), 'AgeBin'] = i
# 생성된 변수의 결측치가 있다면 이를 0으로 결측치 처리
fill_cols = ['PctRoomService', 'PctFoodCourt', 'PctShoppingMall', 'PctSpa', 'PctVRDeck']
all_data[fill_cols] = all_data[fill_cols].fillna(0)
all_data.drop(['PassengerId', 'Name', 'Cabin'], axis=1, inplace=True)
for col in all_data.columns[all_data.dtypes == object]:
if col != 'Transported':
le = LabelEncoder()
all_data[col] = le.fit_transform(all_data[col])
all_data['CryoSleep'] = all_data['CryoSleep'].astype('int')
all_data['VIP'] = all_data['VIP'].astype('int')
train, X_test = all_data.iloc[ :train.shape[0]], all_data.iloc[train.shape[0]:].drop(['Transported'], axis=1)
X_train, y_train = train.drop(['Transported'], axis=1), train['Transported']
s = setup(data=train,
session_id=7010,
target='Transported',
train_size=0.99,
fold_strategy='stratifiedkfold',
fold=5,
fold_shuffle=True,
silent=True,
ignore_low_variance=True,
remove_multicollinearity = True,
normalize = True,
normalize_method = 'robust',)
Description | Value | |
---|---|---|
0 | session_id | 7010 |
1 | Target | Transported |
2 | Target Type | Binary |
3 | Label Encoded | False: 0, True: 1 |
4 | Original Data | (8693, 22) |
5 | Missing Values | False |
6 | Numeric Features | 13 |
7 | Categorical Features | 8 |
8 | Ordinal Features | False |
9 | High Cardinality Features | False |
10 | High Cardinality Method | None |
11 | Transformed Train Set | (8606, 45) |
12 | Transformed Test Set | (87, 45) |
13 | Shuffle Train-Test | True |
14 | Stratify Train-Test | False |
15 | Fold Generator | StratifiedKFold |
16 | Fold Number | 5 |
17 | CPU Jobs | -1 |
18 | Use GPU | False |
19 | Log Experiment | False |
20 | Experiment Name | clf-default-name |
21 | USI | 3767 |
22 | Imputation Type | simple |
23 | Iterative Imputation Iteration | None |
24 | Numeric Imputer | mean |
25 | Iterative Imputation Numeric Model | None |
26 | Categorical Imputer | constant |
27 | Iterative Imputation Categorical Model | None |
28 | Unknown Categoricals Handling | least_frequent |
29 | Normalize | True |
30 | Normalize Method | robust |
31 | Transformation | False |
32 | Transformation Method | None |
33 | PCA | False |
34 | PCA Method | None |
35 | PCA Components | None |
36 | Ignore Low Variance | True |
37 | Combine Rare Levels | False |
38 | Rare Level Threshold | None |
39 | Numeric Binning | False |
40 | Remove Outliers | False |
41 | Outliers Threshold | None |
42 | Remove Multicollinearity | True |
43 | Multicollinearity Threshold | 0.9 |
44 | Remove Perfect Collinearity | True |
45 | Clustering | False |
46 | Clustering Iteration | None |
47 | Polynomial Features | False |
48 | Polynomial Degree | None |
49 | Trignometry Features | False |
50 | Polynomial Threshold | None |
51 | Group Features | False |
52 | Feature Selection | False |
53 | Feature Selection Method | classic |
54 | Features Selection Threshold | None |
55 | Feature Interaction | False |
56 | Feature Ratio | False |
57 | Interaction Threshold | None |
58 | Fix Imbalance | False |
59 | Fix Imbalance Method | SMOTE |
# 모델을 비교하여 최적의 모델 4개를 선택.
top4 = compare_models(n_select=4)
Model | Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | TT (Sec) | |
---|---|---|---|---|---|---|---|---|---|
lightgbm | Light Gradient Boosting Machine | 0.8076 | 0.8998 | 0.8141 | 0.8058 | 0.8098 | 0.6151 | 0.6154 | 0.388 |
gbc | Gradient Boosting Classifier | 0.8012 | 0.8949 | 0.8360 | 0.7835 | 0.8088 | 0.6022 | 0.6037 | 2.176 |
rf | Random Forest Classifier | 0.7983 | 0.8816 | 0.7732 | 0.8162 | 0.7940 | 0.5967 | 0.5977 | 1.646 |
ada | Ada Boost Classifier | 0.7963 | 0.8777 | 0.8418 | 0.7736 | 0.8061 | 0.5924 | 0.5950 | 0.504 |
lr | Logistic Regression | 0.7936 | 0.8800 | 0.8293 | 0.7759 | 0.8017 | 0.5871 | 0.5885 | 1.712 |
et | Extra Trees Classifier | 0.7907 | 0.8639 | 0.7610 | 0.8114 | 0.7852 | 0.5816 | 0.5829 | 1.372 |
ridge | Ridge Classifier | 0.7898 | 0.0000 | 0.8506 | 0.7603 | 0.8028 | 0.5793 | 0.5836 | 0.066 |
lda | Linear Discriminant Analysis | 0.7898 | 0.8739 | 0.8506 | 0.7603 | 0.8028 | 0.5793 | 0.5836 | 0.098 |
knn | K Neighbors Classifier | 0.7770 | 0.8501 | 0.7658 | 0.7856 | 0.7755 | 0.5541 | 0.5543 | 0.662 |
nb | Naive Bayes | 0.7727 | 0.8471 | 0.8945 | 0.7211 | 0.7984 | 0.5447 | 0.5616 | 0.086 |
dt | Decision Tree Classifier | 0.7516 | 0.7515 | 0.7621 | 0.7486 | 0.7553 | 0.5031 | 0.5032 | 0.154 |
svm | SVM - Linear Kernel | 0.7237 | 0.0000 | 0.7901 | 0.7166 | 0.7414 | 0.4468 | 0.4642 | 0.206 |
qda | Quadratic Discriminant Analysis | 0.5085 | 0.5090 | 0.4189 | 0.5315 | 0.4490 | 0.0179 | 0.0222 | 0.072 |
dummy | Dummy Classifier | 0.5031 | 0.5000 | 1.0000 | 0.5031 | 0.6694 | 0.0000 | 0.0000 | 0.042 |
print(top4[0])
LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0, importance_type='split', learning_rate=0.1, max_depth=-1, min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0, n_estimators=100, n_jobs=-1, num_leaves=31, objective=None, random_state=7010, reg_alpha=0.0, reg_lambda=0.0, silent='warn', subsample=1.0, subsample_for_bin=200000, subsample_freq=0)
# 추가 필요 라이브러리 설치
!pip install scikit-optimize
!pip install tune-sklearn ray[tune]
!pip install hpbandster ConfigSpace
!pip install optuna
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikit-optimize
Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
|████████████████████████████████| 100 kB 3.1 MB/s
Requirement already satisfied: scipy>=0.19.1 in /usr/local/lib/python3.7/dist-packages (from scikit-optimize) (1.5.4)
Collecting pyaml>=16.9
Downloading pyaml-21.10.1-py2.py3-none-any.whl (24 kB)
Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.7/dist-packages (from scikit-optimize) (1.19.5)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-optimize) (1.1.0)
Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.7/dist-packages (from scikit-optimize) (0.23.2)
Requirement already satisfied: PyYAML in /usr/local/lib/python3.7/dist-packages (from pyaml>=16.9->scikit-optimize) (5.4.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.20.0->scikit-optimize) (3.1.0)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-21.10.1 scikit-optimize-0.9.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tune-sklearn
Downloading tune_sklearn-0.4.3-py3-none-any.whl (40 kB)
|████████████████████████████████| 40 kB 3.9 MB/s
Collecting ray[tune]
Downloading ray-1.13.0-cp37-cp37m-manylinux2014_x86_64.whl (54.5 MB)
|████████████████████████████████| 54.5 MB 70 kB/s
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from tune-sklearn) (0.23.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from tune-sklearn) (1.5.4)
Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from tune-sklearn) (1.19.5)
Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (1.0.4)
Collecting grpcio<=1.43.0,>=1.28.1
Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
|████████████████████████████████| 4.1 MB 31.6 MB/s
Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (4.3.3)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (5.4.1)
Collecting aiosignal
Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)
Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (3.7.1)
Requirement already satisfied: attrs in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (21.4.0)
Collecting frozenlist
Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)
|████████████████████████████████| 144 kB 42.2 MB/s
Collecting virtualenv
Downloading virtualenv-20.15.1-py2.py3-none-any.whl (10.1 MB)
|████████████████████████████████| 10.1 MB 49.4 MB/s
Requirement already satisfied: click<=8.0.4,>=7.0 in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (7.1.2)
Requirement already satisfied: protobuf<4.0.0,>=3.15.3 in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (3.17.3)
Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (2.28.0)
Collecting tensorboardX>=1.9
Downloading tensorboardX-2.5.1-py2.py3-none-any.whl (125 kB)
|████████████████████████████████| 125 kB 35.5 MB/s
Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (1.3.5)
Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from ray[tune]) (0.8.9)
Requirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.7/dist-packages (from grpcio<=1.43.0,>=1.28.1->ray[tune]) (1.15.0)
Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from jsonschema->ray[tune]) (4.11.4)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from jsonschema->ray[tune]) (4.1.1)
Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->ray[tune]) (5.7.1)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->ray[tune]) (0.18.1)
Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from importlib-resources>=1.4.0->jsonschema->ray[tune]) (3.8.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->ray[tune]) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->ray[tune]) (2022.1)
Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests->ray[tune]) (2.0.12)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->ray[tune]) (2.10)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->ray[tune]) (1.24.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->ray[tune]) (2022.6.15)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->tune-sklearn) (1.1.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->tune-sklearn) (3.1.0)
Collecting platformdirs<3,>=2
Downloading platformdirs-2.5.2-py3-none-any.whl (14 kB)
Collecting distlib<1,>=0.3.1
Downloading distlib-0.3.4-py2.py3-none-any.whl (461 kB)
|████████████████████████████████| 461 kB 23.7 MB/s
Installing collected packages: platformdirs, frozenlist, distlib, virtualenv, grpcio, aiosignal, tensorboardX, ray, tune-sklearn
Attempting uninstall: grpcio
Found existing installation: grpcio 1.46.3
Uninstalling grpcio-1.46.3:
Successfully uninstalled grpcio-1.46.3
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.8.2+zzzcolab20220527125636 requires numpy>=1.20, but you have numpy 1.19.5 which is incompatible.
Successfully installed aiosignal-1.2.0 distlib-0.3.4 frozenlist-1.3.0 grpcio-1.43.0 platformdirs-2.5.2 ray-1.13.0 tensorboardX-2.5.1 tune-sklearn-0.4.3 virtualenv-20.15.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hpbandster
Downloading hpbandster-0.7.4.tar.gz (51 kB)
|████████████████████████████████| 51 kB 115 kB/s
Collecting ConfigSpace
Downloading ConfigSpace-0.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)
|████████████████████████████████| 4.7 MB 7.1 MB/s
Collecting Pyro4
Downloading Pyro4-4.82-py2.py3-none-any.whl (89 kB)
|████████████████████████████████| 89 kB 9.5 MB/s
Collecting serpent
Downloading serpent-1.41-py3-none-any.whl (9.6 kB)
Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from hpbandster) (1.19.5)
Requirement already satisfied: statsmodels in /usr/local/lib/python3.7/dist-packages (from hpbandster) (0.10.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from hpbandster) (1.5.4)
Collecting netifaces
Downloading netifaces-0.11.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (32 kB)
Requirement already satisfied: pyparsing in /usr/local/lib/python3.7/dist-packages (from ConfigSpace) (3.0.9)
Requirement already satisfied: cython in /usr/local/lib/python3.7/dist-packages (from ConfigSpace) (0.29.30)
Requirement already satisfied: patsy>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from statsmodels->hpbandster) (0.5.2)
Requirement already satisfied: pandas>=0.19 in /usr/local/lib/python3.7/dist-packages (from statsmodels->hpbandster) (1.3.5)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.19->statsmodels->hpbandster) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.19->statsmodels->hpbandster) (2022.1)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from patsy>=0.4.0->statsmodels->hpbandster) (1.15.0)
Building wheels for collected packages: hpbandster
Building wheel for hpbandster (setup.py) ... done
Created wheel for hpbandster: filename=hpbandster-0.7.4-py3-none-any.whl size=80006 sha256=3b7de96533634b477549b4b92e1fe0ad5049b291461abca12d965aecb2e04d4b
Stored in directory: /root/.cache/pip/wheels/d9/88/fc/61ab6b9f386a386839668631c39a6dc3c2fb0ec7000d552faa
Successfully built hpbandster
Installing collected packages: serpent, Pyro4, netifaces, ConfigSpace, hpbandster
Successfully installed ConfigSpace-0.5.0 Pyro4-4.82 hpbandster-0.7.4 netifaces-0.11.0 serpent-1.41
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
Downloading optuna-2.10.1-py3-none-any.whl (308 kB)
|████████████████████████████████| 308 kB 4.3 MB/s
Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from optuna) (4.64.0)
Requirement already satisfied: alembic in /usr/local/lib/python3.7/dist-packages (from optuna) (1.8.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from optuna) (1.19.5)
Requirement already satisfied: sqlalchemy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from optuna) (1.4.37)
Collecting colorlog
Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cliff
Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
|████████████████████████████████| 81 kB 9.3 MB/s
Requirement already satisfied: scipy!=1.4.0 in /usr/local/lib/python3.7/dist-packages (from optuna) (1.5.4)
Requirement already satisfied: PyYAML in /usr/local/lib/python3.7/dist-packages (from optuna) (5.4.1)
Collecting cmaes>=0.8.2
Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from optuna) (21.3)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->optuna) (3.0.9)
Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from sqlalchemy>=1.1.0->optuna) (4.11.4)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.7/dist-packages (from sqlalchemy>=1.1.0->optuna) (1.1.2)
Requirement already satisfied: Mako in /usr/local/lib/python3.7/dist-packages (from alembic->optuna) (1.2.0)
Requirement already satisfied: importlib-resources in /usr/local/lib/python3.7/dist-packages (from alembic->optuna) (5.7.1)
Collecting stevedore>=2.0.1
Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
|████████████████████████████████| 49 kB 6.9 MB/s
Collecting cmd2>=1.0.0
Downloading cmd2-2.4.1-py3-none-any.whl (146 kB)
|████████████████████████████████| 146 kB 59.8 MB/s
Collecting autopage>=0.4.0
Downloading autopage-0.5.1-py3-none-any.whl (29 kB)
Collecting pbr!=2.1.0,>=2.0.0
Downloading pbr-5.9.0-py2.py3-none-any.whl (112 kB)
|████████████████████████████████| 112 kB 71.8 MB/s
Requirement already satisfied: PrettyTable>=0.7.2 in /usr/local/lib/python3.7/dist-packages (from cliff->optuna) (3.3.0)
Requirement already satisfied: attrs>=16.3.0 in /usr/local/lib/python3.7/dist-packages (from cmd2>=1.0.0->cliff->optuna) (21.4.0)
Collecting pyperclip>=1.6
Downloading pyperclip-1.8.2.tar.gz (20 kB)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from cmd2>=1.0.0->cliff->optuna) (4.1.1)
Requirement already satisfied: wcwidth>=0.1.7 in /usr/local/lib/python3.7/dist-packages (from cmd2>=1.0.0->cliff->optuna) (0.2.5)
Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->sqlalchemy>=1.1.0->optuna) (3.8.0)
Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.7/dist-packages (from Mako->alembic->optuna) (2.0.1)
Building wheels for collected packages: pyperclip
Building wheel for pyperclip (setup.py) ... done
Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11137 sha256=5baaa891de7cb03647afb94cc41fbd764be0a0fffd64166bbf35563f81663bfa
Stored in directory: /root/.cache/pip/wheels/9f/18/84/8f69f8b08169c7bae2dde6bd7daf0c19fca8c8e500ee620a28
Successfully built pyperclip
Installing collected packages: pyperclip, pbr, stevedore, cmd2, autopage, colorlog, cmaes, cliff, optuna
Successfully installed autopage-0.5.1 cliff-3.10.1 cmaes-0.8.2 cmd2-2.4.1 colorlog-6.6.0 optuna-2.10.1 pbr-5.9.0 pyperclip-1.8.2 stevedore-3.5.0
import optuna
# 참고용
# catboost_best = create_model('catboost', nan_mode= 'Min',
# eval_metric='Logloss',
# iterations=1000,
# sampling_frequency='PerTree',
# leaf_estimation_method='Newton',
# grow_policy='SymmetricTree',
# penalties_coefficient=1,
# boosting_type='Plain',
# model_shrink_mode='Constant',
# feature_border_type='GreedyLogSum',
# l2_leaf_reg=3,
# random_strength=1,
# rsm=1,
# boost_from_average=False,
# model_size_reg=0.5,
# subsample=0.800000011920929,
# use_best_model=False,
# class_names=[0, 1],
# depth=6,
# posterior_sampling=False,
# border_count=254,
# classes_count=0,
# auto_class_weights='None',
# sparse_features_conflict_fraction=0,
# leaf_estimation_backtracking='AnyImprovement',
# best_model_min_trees=1,
# model_shrink_rate=0,
# min_data_in_leaf=1,
# loss_function='Logloss',
# learning_rate=0.02582800015807152,
# score_function='Cosine',
# task_type='CPU',
# leaf_estimation_iterations=10,
# bootstrap_type='MVS',
# max_leaves=64)
# 성능이 좋은 특정 모델 지정.
lightgbm_best = create_model('lightgbm', nan_mode= 'Min',
eval_metric='Logloss',
iterations=1000,
sampling_frequency='PerTree',
leaf_estimation_method='Newton',
grow_policy='SymmetricTree',
penalties_coefficient=1,
model_shrink_mode='Constant',
feature_border_type='GreedyLogSum',
l2_leaf_reg=3,
random_strength=1,
rsm=1,
boost_from_average=False,
model_size_reg=0.5,
subsample=0.800000011920929,
use_best_model=False,
class_names=[0, 1],
depth=6,
posterior_sampling=False,
border_count=254,
classes_count=0,
auto_class_weights='None',
sparse_features_conflict_fraction=0,
leaf_estimation_backtracking='AnyImprovement',
best_model_min_trees=1,
model_shrink_rate=0,
min_data_in_leaf=1,
loss_function='Logloss',
learning_rate=0.02582800015807152,
score_function='Cosine',
leaf_estimation_iterations=10,
bootstrap_type='MVS',
max_leaves=64
)
Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | |
---|---|---|---|---|---|---|---|
Fold | |||||||
0 | 0.7938 | 0.8938 | 0.8048 | 0.7894 | 0.7970 | 0.5876 | 0.5877 |
1 | 0.8100 | 0.9013 | 0.8418 | 0.7933 | 0.8168 | 0.6198 | 0.6210 |
2 | 0.8094 | 0.8926 | 0.8303 | 0.7989 | 0.8143 | 0.6187 | 0.6192 |
3 | 0.7955 | 0.8912 | 0.8464 | 0.7700 | 0.8064 | 0.5907 | 0.5936 |
4 | 0.8181 | 0.9057 | 0.8395 | 0.8069 | 0.8229 | 0.6361 | 0.6367 |
Mean | 0.8054 | 0.8969 | 0.8326 | 0.7917 | 0.8115 | 0.6106 | 0.6117 |
Std | 0.0093 | 0.0056 | 0.0148 | 0.0124 | 0.0090 | 0.0186 | 0.0183 |
# 다음과 같이 모델의 미세 조정이 가능.
# catboost = tune_model(create_model('catboost'), choose_better = True, n_iter = 20)
# catboost2 = tune_model(create_model('catboost'), optimize='Accuracy',
# search_library='scikit-optimize', search_algorithm='bayesian',
# choose_better = True, n_iter = 20)
# catboost3 = tune_model(create_model('catboost'), optimize='Accuracy',
# search_library='tune-sklearn', search_algorithm='bayesian',
# choose_better = True, n_iter = 20)
# catboost4 = tune_model(create_model('catboost'), optimize='Accuracy',
# search_library='tune-sklearn', search_algorithm='hyperopt',
# choose_better = True, n_iter = 20)
# catboost5 = tune_model(create_model('catboost'), optimize='Accuracy',
# search_library='tune-sklearn', search_algorithm='optuna',
# choose_better = True, n_iter = 20)
# catboost6 = tune_model(create_model('catboost'), optimize='Accuracy',
# search_library='optuna', search_algorithm='tpe',
# choose_better = True, n_iter = 20)
# tuned_top4 = [tune_model(i) for i in top4]
# blender_top4 = blend_models(estimator_list=tuned_top4)
# 지정된 모델을 이용하여 예측을 수행
df_pred = predict_model(lightgbm_best, X_test)
y_pred = df_pred.loc[:, ['Label']]
submission['Transported'] = y_pred
submission.to_csv('submission.csv', index=False)
submission
PassengerId | Transported | |
---|---|---|
0 | 0013_01 | True |
1 | 0018_01 | False |
2 | 0019_01 | True |
3 | 0021_01 | True |
4 | 0023_01 | False |
... | ... | ... |
4272 | 9266_02 | True |
4273 | 9269_01 | False |
4274 | 9271_01 | True |
4275 | 9273_01 | True |
4276 | 9277_01 | True |
4277 rows × 2 columns