import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
plt.style.use('ggplot')  # R의 ggplot라이브러리와 비슷한 형태로 설정.(1.41이상)

print(pd.__version__)
print(sns.__version__)
print(mpl.__version__)

iris = sns.load_dataset("iris")
iris

2.1.4
0.12.2
3.8.0

plt.title("IRIS Petal Width")  # 제목
plt.xlabel("Width")            # x 레이블
plt.ylabel("Count")            # y 레이블
plt.hist(iris['petal_width'])

(array([41.,  8.,  1.,  7.,  8., 33.,  6., 23.,  9., 14.]),
 array([0.1 , 0.34, 0.58, 0.82, 1.06, 1.3 , 1.54, 1.78, 2.02, 2.26, 2.5 ]),
 <BarContainer object of 10 artists>)

plt.figure(figsize=(10,6))
plt.title("IRIS Petal Width", fontsize=25, y=1.01)  # y : 그래프로부터 제목까지 간격
plt.xlabel("Width", fontsize=15)            # x 레이블
plt.ylabel("Count", fontsize=15)            # y 레이블
plt.hist(iris['petal_width'])

(array([41.,  8.,  1.,  7.,  8., 33.,  6., 23.,  9., 14.]),
 array([0.1 , 0.34, 0.58, 0.82, 1.06, 1.3 , 1.54, 1.78, 2.02, 2.26, 2.5 ]),
 <BarContainer object of 10 artists>)

iris.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

fig, ax = plt.subplots(2,2, figsize=(12,8))

ax[0][0].hist(iris['petal_width'], color='deepskyblue')
ax[0][0].set_ylabel('Count', fontsize=15)
ax[0][0].set_xlabel('Width', fontsize=15)
ax[0][0].set_title("IRIS Petal Width", fontsize=14, y=1.01)

ax[0][1].hist(iris['petal_length'], color='deepskyblue')
ax[0][1].set_ylabel('Count', fontsize=15)
ax[0][1].set_xlabel('Width', fontsize=15)
ax[0][1].set_title("IRIS Petal_length", fontsize=14, y=1.01)

ax[1][0].hist(iris['sepal_width'], color='dodgerblue')
ax[1][0].set_ylabel('Count', fontsize=15)
ax[1][0].set_xlabel('Width', fontsize=15)
ax[1][0].set_title("IRIS sepal_width", fontsize=14, y=1.01)

ax[1][1].hist(iris['sepal_length'], color='dodgerblue')
ax[1][1].set_ylabel('Count', fontsize=15)
ax[1][1].set_xlabel('Width', fontsize=15)
ax[1][1].set_title("IRIS setal_length", fontsize=14, y=1.01)

plt.tight_layout()  # 서브 플롯이 겹치지 않도록 자동으로 조절해준다.

iris.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

fig, ax = plt.subplots( figsize=(6,6) )

ax.scatter( iris['sepal_length'], iris['sepal_width'], color="sandybrown")

<matplotlib.collections.PathCollection at 0x24f66a94b50>

fig, ax = plt.subplots( figsize=(6,6) )

ax.scatter( iris['petal_length'], iris['petal_width'], color="sandybrown")

<matplotlib.collections.PathCollection at 0x24f66b52fd0>

cols = list( iris.columns )[0:4]
cols

['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

iris.species.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

# virginica인 것들의 4개의 피처 평균
[ iris[ iris.species == 'virginica'][one].mean() for one in cols ]

[6.587999999999998, 2.974, 5.5520000000000005, 2.0260000000000002]

# setosa인 것들의 4개의 피처 평균
[ iris[ iris.species == 'setosa'][one].mean() for one in cols ]

[5.006, 3.428, 1.4620000000000002, 0.24599999999999997]

# versicolor인 것들의 4개의 피처 평균
[ iris[ iris.species == 'versicolor'][one].mean() for one in cols ]

[5.936, 2.7700000000000005, 4.26, 1.3259999999999998]

# x축 레이블 정의
x = cols

# setosa인 것들의 4개의 피처 평균
setosa = [ iris[ iris.species == 'setosa'][one].mean() for one in cols ]

# setosa인 것들의 4개의 피처 평균
setosa = [ iris[ iris.species == 'setosa'][one].mean() for one in cols ] 
# versicolor인 것들의 4개의 피처 평균
versi = [ iris[ iris.species == 'versicolor'][one].mean() for one in cols ] 
# virginica인 것들의 4개의 피처 평균
virgi = [ iris[ iris.species == 'virginica'][one].mean() for one in cols ]

plt.figure(figsize=(15,10))

plt.subplot(2,2,1)
plt.bar( x, setosa )
plt.title("setosa")

plt.subplot(2,2,2)
plt.bar( x, versi )
plt.title("versicolor")

plt.subplot(2,2,3)
plt.bar( x, virgi )
plt.title("virginica")

Text(0.5, 1.0, 'virginica')

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

Matplotlib 라이브러리 IRIS 데이터 셋 실습해보기¶

학습 내용¶

목차

01 데이터 준비

02. 4개의 피처 값 확인 - 히스토그램

그래프의 크기 설정 및 세부 설정¶

03. Scatter Plot (산점도) - 두 변수간 상관관계 확인

실습 - petal_width와 length의 관계를 확인해 보자.¶

04. 막대 그래프

실습해 보기 1¶

REF¶