In [4]:
import matplotlib.pyplot as plt
import numpy as np
In [5]:
%matplotlib inline

import seaborn as sns
In [8]:
x = np.linspace(0,14,100)
y1 = np.sin(x)
y2 = 2*np.sin(x+0.5)
y3 = 4*np.sin(x+1.0)

plt.figure(figsize=(10,6))   # 그림의 크기
plt.plot(x, y1)
Out[8]:
[<matplotlib.lines.Line2D at 0x2515e768898>]
In [10]:
plt.plot(x, y1, x,y2, x, y3)   # 3개의 sin 그래프
Out[10]:
[<matplotlib.lines.Line2D at 0x2515e67ef60>,
 <matplotlib.lines.Line2D at 0x2515e68c160>,
 <matplotlib.lines.Line2D at 0x2515e68c978>]

새로운 스타일 적용

In [11]:
sns.set_style("whitegrid")
In [12]:
plt.plot(x, y1, x,y2, x, y3)   # 3개의 sin 그래프
Out[12]:
[<matplotlib.lines.Line2D at 0x2515e778358>,
 <matplotlib.lines.Line2D at 0x2515e778550>,
 <matplotlib.lines.Line2D at 0x2515e778da0>]

tip 데이터로 데이터 살펴보고 인사이트 얻기

In [16]:
sns.set_style("whitegrid")
In [18]:
tips = sns.load_dataset("tips")  # 인터넷이 켜져 있어야 함.
tips
Out[18]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
5 25.29 4.71 Male No Sun Dinner 4
6 8.77 2.00 Male No Sun Dinner 2
7 26.88 3.12 Male No Sun Dinner 4
8 15.04 1.96 Male No Sun Dinner 2
9 14.78 3.23 Male No Sun Dinner 2
10 10.27 1.71 Male No Sun Dinner 2
11 35.26 5.00 Female No Sun Dinner 4
12 15.42 1.57 Male No Sun Dinner 2
13 18.43 3.00 Male No Sun Dinner 4
14 14.83 3.02 Female No Sun Dinner 2
15 21.58 3.92 Male No Sun Dinner 2
16 10.33 1.67 Female No Sun Dinner 3
17 16.29 3.71 Male No Sun Dinner 3
18 16.97 3.50 Female No Sun Dinner 3
19 20.65 3.35 Male No Sat Dinner 3
20 17.92 4.08 Male No Sat Dinner 2
21 20.29 2.75 Female No Sat Dinner 2
22 15.77 2.23 Female No Sat Dinner 2
23 39.42 7.58 Male No Sat Dinner 4
24 19.82 3.18 Male No Sat Dinner 2
25 17.81 2.34 Male No Sat Dinner 4
26 13.37 2.00 Male No Sat Dinner 2
27 12.69 2.00 Male No Sat Dinner 2
28 21.70 4.30 Male No Sat Dinner 2
29 19.65 3.00 Female No Sat Dinner 2
... ... ... ... ... ... ... ...
214 28.17 6.50 Female Yes Sat Dinner 3
215 12.90 1.10 Female Yes Sat Dinner 2
216 28.15 3.00 Male Yes Sat Dinner 5
217 11.59 1.50 Male Yes Sat Dinner 2
218 7.74 1.44 Male Yes Sat Dinner 2
219 30.14 3.09 Female Yes Sat Dinner 4
220 12.16 2.20 Male Yes Fri Lunch 2
221 13.42 3.48 Female Yes Fri Lunch 2
222 8.58 1.92 Male Yes Fri Lunch 1
223 15.98 3.00 Female No Fri Lunch 3
224 13.42 1.58 Male Yes Fri Lunch 2
225 16.27 2.50 Female Yes Fri Lunch 2
226 10.09 2.00 Female Yes Fri Lunch 2
227 20.45 3.00 Male No Sat Dinner 4
228 13.28 2.72 Male No Sat Dinner 2
229 22.12 2.88 Female Yes Sat Dinner 2
230 24.01 2.00 Male Yes Sat Dinner 4
231 15.69 3.00 Male Yes Sat Dinner 3
232 11.61 3.39 Male No Sat Dinner 2
233 10.77 1.47 Male No Sat Dinner 2
234 15.53 3.00 Male Yes Sat Dinner 2
235 10.07 1.25 Male No Sat Dinner 2
236 12.60 1.00 Male Yes Sat Dinner 2
237 32.83 1.17 Male Yes Sat Dinner 2
238 35.83 4.67 Female No Sat Dinner 3
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [36]:
tips.head()  ## 앞의 데이터 조금만 살펴보기
Out[36]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [23]:
import pandas as pd
In [ ]:
index=titanic_train["Survived"],  # Make a crosstab
                              columns="count")      # Name the count column
In [27]:
my_tab = pd.crosstab(index=tips["time"],  # Make a crosstab
                              columns="count")      # Name the count column
my_tab  # 동시에 여러개 띄우기 
Out[27]:
col_0 count
time
Lunch 68
Dinner 176

Boxplot을 이용한 시각화 하기

요일별 식사금액은 얼마나 될까?

In [34]:
plt.figure(figsize=(8,6))
sns.boxplot(x="day", y="total_bill", data=tips)
plt.show()

실습1. 요일별 Tip은 얼마나 될까?

In [35]:
plt.figure(figsize=(8,6))  # 사이즈
_____      # 빈칸을 채워보자.
plt.show()

요일별 식사 금액, 그런데 흡연자와 비흡연자를 비교해 보자.

In [30]:
plt.figure(figsize=(8,6))
sns.boxplot(x="day", y="total_bill", hue="smoker", data=tips, palette="Set3")
plt.show()

Regression(회귀선)을 그어서 대략적인 예측을 수행해보자.

식사금액과 팁의 상관관계

In [33]:
sns.set_style("darkgrid")
sns.lmplot(x="total_bill", y="tip", data=tips, size=7)
plt.show()

그러면 담배 필 때와 안 피는 사람은?

In [37]:
sns.lmplot(x="total_bill", y="tip", hue="smoker", data=tips, palette="Set1", size=7)
plt.show()

항공 데이터 이용

연도별  월   승객
...
In [49]:
fg = sns.load_dataset("flights")
fg.head(5)
fg
Out[49]:
year month passengers
0 1949 January 112
1 1949 February 118
2 1949 March 132
3 1949 April 129
4 1949 May 121
5 1949 June 135
6 1949 July 148
7 1949 August 148
8 1949 September 136
9 1949 October 119
10 1949 November 104
11 1949 December 118
12 1950 January 115
13 1950 February 126
14 1950 March 141
15 1950 April 135
16 1950 May 125
17 1950 June 149
18 1950 July 170
19 1950 August 170
20 1950 September 158
21 1950 October 133
22 1950 November 114
23 1950 December 140
24 1951 January 145
25 1951 February 150
26 1951 March 178
27 1951 April 163
28 1951 May 172
29 1951 June 178
... ... ... ...
114 1958 July 491
115 1958 August 505
116 1958 September 404
117 1958 October 359
118 1958 November 310
119 1958 December 337
120 1959 January 360
121 1959 February 342
122 1959 March 406
123 1959 April 396
124 1959 May 420
125 1959 June 472
126 1959 July 548
127 1959 August 559
128 1959 September 463
129 1959 October 407
130 1959 November 362
131 1959 December 405
132 1960 January 417
133 1960 February 391
134 1960 March 419
135 1960 April 461
136 1960 May 472
137 1960 June 535
138 1960 July 622
139 1960 August 606
140 1960 September 508
141 1960 October 461
142 1960 November 390
143 1960 December 432

144 rows × 3 columns

In [50]:
type(fg)
Out[50]:
pandas.core.frame.DataFrame
In [51]:
fgp = fg.pivot("month", "year", "passengers")
fgp
Out[51]:
year 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960
month
January 112 115 145 171 196 204 242 284 315 340 360 417
February 118 126 150 180 196 188 233 277 301 318 342 391
March 132 141 178 193 236 235 267 317 356 362 406 419
April 129 135 163 181 235 227 269 313 348 348 396 461
May 121 125 172 183 229 234 270 318 355 363 420 472
June 135 149 178 218 243 264 315 374 422 435 472 535
July 148 170 199 230 264 302 364 413 465 491 548 622
August 148 170 199 242 272 293 347 405 467 505 559 606
September 136 158 184 209 237 259 312 355 404 404 463 508
October 119 133 162 191 211 229 274 306 347 359 407 461
November 104 114 146 172 180 203 237 271 305 310 362 390
December 118 140 166 194 201 229 278 306 336 337 405 432
In [52]:
plt.figure(figsize=(10,8))
sns.heatmap(fgp, annot=True, fmt="d")
plt.show()

iris 데이터를 살펴보기

In [53]:
sns.set(style="ticks")
iris = sns.load_dataset("iris")
iris
Out[53]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
5 5.4 3.9 1.7 0.4 setosa
6 4.6 3.4 1.4 0.3 setosa
7 5.0 3.4 1.5 0.2 setosa
8 4.4 2.9 1.4 0.2 setosa
9 4.9 3.1 1.5 0.1 setosa
10 5.4 3.7 1.5 0.2 setosa
11 4.8 3.4 1.6 0.2 setosa
12 4.8 3.0 1.4 0.1 setosa
13 4.3 3.0 1.1 0.1 setosa
14 5.8 4.0 1.2 0.2 setosa
15 5.7 4.4 1.5 0.4 setosa
16 5.4 3.9 1.3 0.4 setosa
17 5.1 3.5 1.4 0.3 setosa
18 5.7 3.8 1.7 0.3 setosa
19 5.1 3.8 1.5 0.3 setosa
20 5.4 3.4 1.7 0.2 setosa
21 5.1 3.7 1.5 0.4 setosa
22 4.6 3.6 1.0 0.2 setosa
23 5.1 3.3 1.7 0.5 setosa
24 4.8 3.4 1.9 0.2 setosa
25 5.0 3.0 1.6 0.2 setosa
26 5.0 3.4 1.6 0.4 setosa
27 5.2 3.5 1.5 0.2 setosa
28 5.2 3.4 1.4 0.2 setosa
29 4.7 3.2 1.6 0.2 setosa
... ... ... ... ... ...
120 6.9 3.2 5.7 2.3 virginica
121 5.6 2.8 4.9 2.0 virginica
122 7.7 2.8 6.7 2.0 virginica
123 6.3 2.7 4.9 1.8 virginica
124 6.7 3.3 5.7 2.1 virginica
125 7.2 3.2 6.0 1.8 virginica
126 6.2 2.8 4.8 1.8 virginica
127 6.1 3.0 4.9 1.8 virginica
128 6.4 2.8 5.6 2.1 virginica
129 7.2 3.0 5.8 1.6 virginica
130 7.4 2.8 6.1 1.9 virginica
131 7.9 3.8 6.4 2.0 virginica
132 6.4 2.8 5.6 2.2 virginica
133 6.3 2.8 5.1 1.5 virginica
134 6.1 2.6 5.6 1.4 virginica
135 7.7 3.0 6.1 2.3 virginica
136 6.3 3.4 5.6 2.4 virginica
137 6.4 3.1 5.5 1.8 virginica
138 6.0 3.0 4.8 1.8 virginica
139 6.9 3.1 5.4 2.1 virginica
140 6.7 3.1 5.6 2.4 virginica
141 6.9 3.1 5.1 2.3 virginica
142 5.8 2.7 5.1 1.9 virginica
143 6.8 3.2 5.9 2.3 virginica
144 6.7 3.3 5.7 2.5 virginica
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

150 rows × 5 columns

In [56]:
sns.pairplot(iris)
Out[56]:
<seaborn.axisgrid.PairGrid at 0x25160bf29b0>
In [54]:
sns.pairplot(iris, hue="species")
plt.show()