import warnings
warnings.filterwarnings(action='ignore')
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import mglearn
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import numpy as np
from mglearn.datasets import make_blobs
### 데이터 만들기
X, y = make_blobs(n_samples=(400, 50),
centers=2,
cluster_std=[7.0, 2], # 클러스터의 표준 편차
random_state=42)
print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
(450, 2) (450,)
X_train[0:10], y_train[0:10]
(array([[-0.18299954, 3.77488037], [-2.73847051, 5.21031273], [ 4.14376924, 4.97596054], [ 5.21160962, 2.64208326], [ 1.78996928, 14.3168401 ], [ 1.09604925, 12.61078778], [-2.16954623, 3.19763531], [-8.04251881, 12.31456463], [-0.48077362, 23.54209172], [-8.287678 , 6.76458524]]), array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0]))
plt.scatter(X[:,0], X[:,1],
c=y,
cmap=plt.cm.autumn, s=60, edgecolors='k')
<matplotlib.collections.PathCollection at 0x7fa72ba8daf0>
mglearn.plots.plot_decision_threshold()
svc = SVC(gamma=.05).fit(X_train, y_train)
pred = svc.predict(X_test)
print(classification_report(y_test, pred))
precision recall f1-score support 0 0.99 0.93 0.96 107 1 0.42 0.83 0.56 6 accuracy 0.93 113 macro avg 0.70 0.88 0.76 113 weighted avg 0.96 0.93 0.94 113
pred = svc.decision_function(X_test)
print(pred[0:10])
np.min(pred), np.max(pred)
[-1.0167542 0.72583536 -1.17766946 -1.00425497 -1.0002495 -0.99977182 -1.07285711 -1.2206812 -1.24018502 -1.30361098]
(-1.509707253620952, 1.6245457437087478)
decision_0 = svc.decision_function(X_test) > 0 # 임계값을 0으로
decision_m08 = svc.decision_function(X_test) > -.8 # 임계값을 -0.8로
# TP - 잘 맞추는 것을 늘린다.
print("임계값 0 일때 : 1(양성) 개수 :", decision_0.sum() )
print("임계값 -0.8 일때 : 1(양성) 개수 :", decision_m08.sum() )
임계값 0 일때 : 1(양성) 개수 : 12 임계값 -0.8 일때 : 1(양성) 개수 : 18
print("임계값 0 일때 : 0(음성) 개수 :", len(decision_0) - decision_0.sum())
print("임계값 -0.8 일때 : 0(음성) 개수 :", len(decision_m08) - decision_m08.sum() )
임계값 0 일때 : 0(음성) 개수 : 101 임계값 -0.8 일때 : 0(음성) 개수 : 95
y_pred_0 = svc.decision_function(X_test) > 0
y_pred_08 = svc.decision_function(X_test) > -.8
# 임계값 0
print(classification_report(y_test, y_pred_0))
precision recall f1-score support 0 0.99 0.93 0.96 107 1 0.42 0.83 0.56 6 accuracy 0.93 113 macro avg 0.70 0.88 0.76 113 weighted avg 0.96 0.93 0.94 113
print(classification_report(y_test, y_pred_08))
precision recall f1-score support 0 1.00 0.89 0.94 107 1 0.33 1.00 0.50 6 accuracy 0.89 113 macro avg 0.67 0.94 0.72 113 weighted avg 0.96 0.89 0.92 113
재현율(recall) :
TP/(TP + FN) : 실제 양성 데이터의 얼마나 잘 맞추었을까?
다른 말로 민감도(sensitivity), 적중률(hit rate), 진짜 양성 비율(TPR)이라고 합니다.
교육용으로 작성된 것으로 배포 및 복제시에 사전 허가가 필요합니다.
Copyright 2022 LIM Co. all rights reserved.