import urllib.request as req
local = "mushroom.csv"
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
req.urlretrieve(url, local)
print("다운로드 완료")

다운로드 완료


### 모델 만들기 
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split


# 데이터 읽기
mush = pd.read_csv("mushroom.csv", header=None)
mush


from sklearn import preprocessing

encoder_le = preprocessing.LabelEncoder()
mush['label'] = encoder_le.fit_transform(mush.iloc[:, 0])  # 1열의 값을 변경하여 label을 만든다.
mush


for i in range(1,23,1):
    mush['col' + str(i)] = encoder_le.fit_transform(mush.iloc[:, i])  # 각 열의 값을 변경하여 feature를 만든다.

mush


col_all = list(range(0,23,1))
col_all

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22]


mush.drop(col_all, axis=1)


X = mush.loc[: , "col1":"col7"]  # 모든 행, col1~col7까지 선택
y = mush['label']                # 예측하고자 하는 열 선택

print(X.shape, y.shape)

(8124, 7) (8124,)


### 학습 데이터와 테스트 데이터 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(6093, 7) (6093,)
(2031, 7) (2031,)


from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


### 모델 선택 및 학습
model = RandomForestClassifier()
model.fit(X_train, y_train)

RandomForestClassifier()


### 새로운 데이터로 예측해 보기
predict = model.predict(X_test)
predict

array([1, 0, 0, ..., 1, 0, 0])


print( len(predict), len(y_test) )

2031 2031


y_test.values

array([1, 0, 0, ..., 1, 0, 0])


# 얼마나 적중했을까?
import numpy as np
np.mean( predict==y_test.values )

0.9945839487936977

공공 데이터를 활용한 머신러닝 실습¶

1-1 머신러닝 모델 만들기¶

1-2 데이터 준비¶

기호를 숫자로 변경¶

2열부터 나머지 열을 숫자로 변경¶

기존의 정보 열을 삭제¶

모델을 위해 사용하는 데이터¶

1-3 모델 선택 및 학습¶

1-4 정확도 확인¶

Summary¶

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22
0	p	x	s	n	t	p	f	c	n	k	e	e	s	s	w	w	p	w	o	p	k	s	u
1	e	x	s	y	t	a	f	c	b	k	e	c	s	s	w	w	p	w	o	p	n	n	g
2	e	b	s	w	t	l	f	c	b	n	e	c	s	s	w	w	p	w	o	p	n	n	m
3	p	x	y	w	t	p	f	c	n	n	e	e	s	s	w	w	p	w	o	p	k	s	u
4	e	x	s	g	f	n	f	w	b	k	t	e	s	s	w	w	p	w	o	e	n	a	g
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
8119	e	k	s	n	f	n	a	c	b	y	e	?	s	s	o	o	p	o	o	p	b	c	l
8120	e	x	s	n	f	n	a	c	b	y	e	?	s	s	o	o	p	n	o	p	b	v	l
8121	e	f	s	n	f	n	a	c	b	n	e	?	s	s	o	o	p	o	o	p	b	c	l
8122	p	k	y	n	f	y	f	c	n	b	t	?	s	k	w	w	p	w	o	e	w	v	l
8123	e	x	s	n	f	n	a	c	b	y	e	?	s	s	o	o	p	o	o	p	o	c	l