from sklearn.datasets import load_iris

iris = load_iris()

print(type(dir(iris))) 
# dir()는 객체가 어떤 변수와 메서드를 가지고 있는지 나열함

<class 'list'>


iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])


iris_data = iris.data

print(iris_data.shape) 
#shape는 배열의 형상정보를 출력

(150, 4)


iris_data[0]
# 예시로 하나만 출력

array([5.1, 3.5, 1.4, 0.2])


iris_label = iris.target
print(iris_label.shape)
iris_label

(150,)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])


iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')


iris.filename
# 아이리스 데이터셋의 저장경로

'/home/ssac24/anaconda3/envs/aiffel/lib/python3.7/site-packages/sklearn/datasets/data/iris.csv'


import pandas as pd

print(pd.__version__)

1.1.5


iris_df = pd.DataFrame(data=iris_data, columns=iris.feature_names)
iris_df["label"] = iris.target
iris_df


from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(iris_data, 
                                                    iris_label, 
                                                    test_size=0.2, 
                                                    random_state=7)

print('X_train 개수: ', len(X_train), ', X_test 개수: ', len(X_test))
# 3번째 인자는 전체 데이터의 양에서 몇 퍼센트를 테스트셋으로 나눌지 구분하는 인자
# random_state는 random_seed로 대체가능

X_train 개수:  120 , X_test 개수:  30


from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier(random_state=32)
print(decision_tree._estimator_type)

classifier


decision_tree.fit(X_train, y_train)
# fit = 학습시킨다

DecisionTreeClassifier(random_state=32)


y_pred = decision_tree.predict(X_test)
y_pred

array([2, 1, 0, 1, 2, 0, 1, 1, 0, 1, 2, 1, 0, 2, 0, 2, 2, 2, 0, 0, 1, 2,
       1, 1, 2, 2, 1, 1, 2, 2])


y_test

array([2, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 1, 0, 2, 0, 1, 2, 2, 0, 0, 1, 2,
       1, 2, 2, 2, 1, 1, 2, 2])


from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
accuracy
# 정확도를 측정할수 있는 매트릭스

0.9


# (1) 필요한 모듈 import
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# (2) 데이터 준비
iris = load_iris()
iris_data = iris.data
iris_label = iris.target

# (3) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(iris_data, 
                                                    iris_label, 
                                                    test_size=0.2, 
                                                    random_state=7)

# (4) 모델 학습 및 예측
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.91      0.83      0.87        12
           2       0.83      0.91      0.87        11

    accuracy                           0.90        30
   macro avg       0.91      0.91      0.91        30
weighted avg       0.90      0.90      0.90        30


from sklearn.ensemble import RandomForestClassifier

X_train, X_test, y_train, y_test = train_test_split(iris_data, 
                                                    iris_label, 
                                                    test_size=0.2, 
                                                    random_state=25)

random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       0.92      0.92      0.92        13
           2       0.88      0.88      0.88         8

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30


from sklearn import svm
svm_model = svm.SVC()

print(svm_model._estimator_type)

svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      0.92      0.96        13
           2       0.89      1.00      0.94         8

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30


from sklearn.linear_model import SGDClassifier
sgd_model = SGDClassifier()

print(sgd_model._estimator_type)

sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      0.89      0.94         9
           1       0.85      0.85      0.85        13
           2       0.78      0.88      0.82         8

    accuracy                           0.87        30
   macro avg       0.87      0.87      0.87        30
weighted avg       0.87      0.87      0.87        30


from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression(max_iter=200)
# 로지스틱의 max_iter값은 기본으로 100으로 되어있으므로 그 수를 늘려 메모리에 저장되는 벡터수를 늘려주면 된다.

print(logistic_model._estimator_type)

logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      0.92      0.96        13
           2       0.89      1.00      0.94         8

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30


from sklearn.datasets import load_digits

digits = load_digits()
# 손글씨 데이터 가져오기
digits.keys()

dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'images', 'DESCR'])


# 데이터 개수
digits_data = digits.data
# 데이터 유형
digits_data.shape

(1797, 64)


digits_data[0]
# 데이터가 8X8픽셀로 이루어진 배열임을 알 수 있다.

array([ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
       15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
       12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
        0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
       10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.])


import matplotlib.pyplot as plt
# 이미지를 보기 위한 라이브러리
%matplotlib inline

plt.imshow(digits.data[0].reshape(8, 8), cmap='gray')
plt.axis('off')
plt.show()


# 여러개 보기
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(digits.data[i].reshape(8, 8), cmap='gray')
    plt.axis('off')
plt.show()

# target 데이터 구성
digits_label = digits.target
print(digits_label.shape)
digits_label[:20]

(1797,)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


# 3일때만 표기
new_label = [3 if i == 3 else 0 for i in digits_label]
new_label[:20]

[0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0]


from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score


digits = load_digits()
digits_data = digits.data
digits_label = digits.target

# (3) train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(digits_data,
                                                    new_label, 
                                                    test_size=0.2, 
                                                    random_state=20)

# (4) 모델 학습 및 예측
decision_tree = DecisionTreeClassifier(random_state=20)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
accuracy

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       322
           3       0.92      0.92      0.92        38

    accuracy                           0.98       360
   macro avg       0.96      0.96      0.96       360
weighted avg       0.98      0.98      0.98       360

0.9833333333333333


# 길이는 y_pred와 같으면서 0으로만 이루어진 리스트를 fake_pred 라는 변수로 저장해 보고, 이 리스트와 실제 정답인 y_test간의 정확도를 확인

fake_pred = [0] * len(y_pred)

accuracy = accuracy_score(y_test, fake_pred)
accuracy

0.8944444444444445


from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)
#각각은 왼쪽 위부터 순서대로 TP,FN,FP,TN의 개수를 나타냅니다. 특히, 손글씨 문제에서의 `0`은 Positive 역할을, `3`은 Negative 역할을 합니다.

array([[319,   3],
       [  3,  35]])


from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       322
           3       0.92      0.92      0.92        38

    accuracy                           0.98       360
   macro avg       0.96      0.96      0.96       360
weighted avg       0.98      0.98      0.98       360


confusion_matrix(y_test, fake_pred)

array([[322,   0],
       [ 38,   0]])


print(classification_report(y_test, fake_pred))
# 3 에 대한 정확도가 0임을 알수 있다

              precision    recall  f1-score   support

           0       0.89      1.00      0.94       322
           3       0.00      0.00      0.00        38

    accuracy                           0.89       360
   macro avg       0.45      0.50      0.47       360
weighted avg       0.80      0.89      0.84       360


accuracy_score(y_test, y_pred), accuracy_score(y_test, fake_pred)
# 정확도에는 별 문제가 없다

(0.9833333333333333, 0.8944444444444445)


from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression


digits = load_digits()
digits_data = digits.data
digits_label = digits.target

# 데이터 지정
X_train, X_test, y_train, y_test = train_test_split(digits_data, 
                                                    digits_label, 
                                                    test_size=0.2, 
                                                    random_state=40)

# targetname 출력
print(digits.target_names)

# 의사결정트리
decision_tree = DecisionTreeClassifier(random_state=40)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

# 랜덤포레스트
random_forest = RandomForestClassifier(random_state=40)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

# S V M

svm_model = svm.SVC()

print(svm_model._estimator_type)

svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

# SGD Classifier 확률적 경사하강법

sgd_model = SGDClassifier()

print(sgd_model._estimator_type)

sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

# Logistic Regression 로지스틱 회귀

logistic_model = LogisticRegression(max_iter=10000)


print(logistic_model._estimator_type)

logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

[0 1 2 3 4 5 6 7 8 9]
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        30
           1       0.77      0.72      0.75        47
           2       0.81      0.79      0.80        38
           3       0.74      0.68      0.71        34
           4       0.90      0.92      0.91        38
           5       0.83      0.86      0.84        28
           6       0.97      0.97      0.97        34
           7       0.81      0.86      0.83        35
           8       0.76      0.72      0.74        36
           9       0.73      0.80      0.76        40

    accuracy                           0.82       360
   macro avg       0.83      0.83      0.83       360
weighted avg       0.82      0.82      0.82       360

              precision    recall  f1-score   support

           0       0.97      1.00      0.98        30
           1       0.96      1.00      0.98        47
           2       1.00      0.97      0.99        38
           3       0.97      1.00      0.99        34
           4       0.97      0.97      0.97        38
           5       0.96      0.96      0.96        28
           6       1.00      0.97      0.99        34
           7       0.95      1.00      0.97        35
           8       0.97      0.94      0.96        36
           9       1.00      0.93      0.96        40

    accuracy                           0.97       360
   macro avg       0.98      0.98      0.97       360
weighted avg       0.98      0.97      0.97       360

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       0.96      1.00      0.98        47
           2       1.00      1.00      1.00        38
           3       1.00      1.00      1.00        34
           4       1.00      1.00      1.00        38
           5       1.00      1.00      1.00        28
           6       1.00      1.00      1.00        34
           7       1.00      1.00      1.00        35
           8       0.97      0.94      0.96        36
           9       1.00      0.97      0.99        40

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       0.87      0.96      0.91        47
           2       1.00      1.00      1.00        38
           3       1.00      0.94      0.97        34
           4       1.00      1.00      1.00        38
           5       1.00      1.00      1.00        28
           6       0.97      1.00      0.99        34
           7       0.97      0.94      0.96        35
           8       0.97      0.86      0.91        36
           9       0.93      0.95      0.94        40

    accuracy                           0.96       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.96      0.96       360

classifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       0.98      0.91      0.95        47
           2       0.97      1.00      0.99        38
           3       0.97      0.94      0.96        34
           4       1.00      0.97      0.99        38
           5       0.96      0.96      0.96        28
           6       0.97      1.00      0.99        34
           7       0.94      0.97      0.96        35
           8       0.94      0.92      0.93        36
           9       0.91      0.97      0.94        40

    accuracy                           0.96       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.96      0.96      0.96       360


from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression


wine = load_wine()
wine_data = wine.data
wine_label = wine.target

X_train, X_test, y_train, y_test = train_test_split(wine_data, 
                                                    wine_label, 
                                                    test_size=0.2, 
                                                    random_state=30)

# targetname 출력
print(wine.target_names)


# 의사결정트리
decision_tree = DecisionTreeClassifier(random_state=30)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

# 랜덤포레스트
random_forest = RandomForestClassifier(random_state=30)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

# S V M

svm_model = svm.SVC()

print(svm_model._estimator_type)

svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

# SGD Classifier 확률적 경사하강법

sgd_model = SGDClassifier()

print(sgd_model._estimator_type)

sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

# Logistic Regression 로지스틱 회귀

logistic_model = LogisticRegression(max_iter=5000)


print(logistic_model._estimator_type)

logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

['class_0' 'class_1' 'class_2']
              precision    recall  f1-score   support

           0       0.83      0.91      0.87        11
           1       0.81      0.81      0.81        16
           2       0.88      0.78      0.82         9

    accuracy                           0.83        36
   macro avg       0.84      0.83      0.84        36
weighted avg       0.83      0.83      0.83        36

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.94      0.97        16
           2       1.00      1.00      1.00         9

    accuracy                           0.97        36
   macro avg       0.97      0.98      0.97        36
weighted avg       0.97      0.97      0.97        36

classifier
              precision    recall  f1-score   support

           0       0.89      0.73      0.80        11
           1       0.79      0.69      0.73        16
           2       0.46      0.67      0.55         9

    accuracy                           0.69        36
   macro avg       0.71      0.69      0.69        36
weighted avg       0.74      0.69      0.71        36

classifier
              precision    recall  f1-score   support

           0       1.00      0.55      0.71        11
           1       1.00      0.12      0.22        16
           2       0.32      1.00      0.49         9

    accuracy                           0.47        36
   macro avg       0.77      0.56      0.47        36
weighted avg       0.83      0.47      0.44        36

classifier
              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.94      0.97        16
           2       1.00      1.00      1.00         9

    accuracy                           0.97        36
   macro avg       0.97      0.98      0.97        36
weighted avg       0.97      0.97      0.97        36


from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression


breast_cancer = load_breast_cancer()
breast_cancer_data = breast_cancer.data
breast_cancer_label = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(breast_cancer_data, 
                                                    breast_cancer_label, 
                                                    test_size=0.2, 
                                                    random_state=40)

# targetname 출력
print(breast_cancer.target_names)

# 의사결정트리
decision_tree = DecisionTreeClassifier(random_state=40)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))

# 랜덤포레스트
random_forest = RandomForestClassifier(random_state=40)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)

print(classification_report(y_test, y_pred))

# S V M

svm_model = svm.SVC()

print(svm_model._estimator_type)

svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print(classification_report(y_test, y_pred))

# SGD Classifier 확률적 경사하강법

sgd_model = SGDClassifier()

print(sgd_model._estimator_type)

sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)

print(classification_report(y_test, y_pred))

# Logistic Regression 로지스틱 회귀

logistic_model = LogisticRegression(max_iter=5000)


print(logistic_model._estimator_type)

logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)

print(classification_report(y_test, y_pred))

['malignant' 'benign']
              precision    recall  f1-score   support

           0       0.93      0.97      0.95        39
           1       0.99      0.96      0.97        75

    accuracy                           0.96       114
   macro avg       0.96      0.97      0.96       114
weighted avg       0.97      0.96      0.97       114

              precision    recall  f1-score   support

           0       0.90      0.97      0.94        39
           1       0.99      0.95      0.97        75

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.95       114
weighted avg       0.96      0.96      0.96       114

classifier
              precision    recall  f1-score   support

           0       0.92      0.87      0.89        39
           1       0.94      0.96      0.95        75

    accuracy                           0.93       114
   macro avg       0.93      0.92      0.92       114
weighted avg       0.93      0.93      0.93       114

classifier
              precision    recall  f1-score   support

           0       1.00      0.77      0.87        39
           1       0.89      1.00      0.94        75

    accuracy                           0.92       114
   macro avg       0.95      0.88      0.91       114
weighted avg       0.93      0.92      0.92       114

classifier
              precision    recall  f1-score   support

           0       0.95      0.97      0.96        39
           1       0.99      0.97      0.98        75

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	label
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2

Exploration5 오디오 음성 데이터 분류 (0)	2021.01.19
Exploration4 영화 리뷰 감성 분류 (0)	2021.01.14
Exploration 3 카메라 스티커 인식 (0)	2021.01.12
Explolation1 가위바위보 분류하기 (0)	2021.01.08
1일차 인공지능 개발자가 되기위한 첫걸음 (0)	2021.01.04

A.I

A.I

Explolation2 아이리스 분류하기 본문

Explolation2 아이리스 분류하기

1. 아이리스 분류하기¶

1-1. 아이리스 데이터 내부 구조 파악

1-2. panda를 이용한 머신러닝¶

1-3. 데이터셋 분리¶

1-4. 의사결정트리 사용해보기¶

1-5. 테스트¶

요약¶

1-6 랜덤포레스트¶

내장 분류 모델들¶

2. 정확도 측정에 대해 알아보자¶

2-1. 손글씨 데이터 의사결정트리로 정확도 측정¶

그래서 정확도 뿐만아니라 다른 척도들도 사용함¶

3. 오차행렬 https://manisha-sirsat.blogspot.com/2019/04/confusion-matrix.html ¶

3-1 손글씨를 분류¶

서포트 벡터 머신이 가장 정확하게 나옴¶

3-2 와인 분류¶

랜덤포레스트 분류가 적당함¶

3-3 유방암 진단 분류¶

로지스틱 회귀가 적합함¶

'AIFFEL' 카테고리의 다른 글

티스토리툴바

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	label
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2

« 2024/07 »
일	월	화	수	목	금	토
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	label
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2

A.I

Explolation2 아이리스 분류하기 본문

Explolation2 아이리스 분류하기

1. 아이리스 분류하기¶

1-1. 아이리스 데이터 내부 구조 파악

1-2. panda를 이용한 머신러닝¶

1-3. 데이터셋 분리¶

1-4. 의사결정트리 사용해보기¶

1-5. 테스트¶

요약¶

1-6 랜덤포레스트¶

내장 분류 모델들¶

2. 정확도 측정에 대해 알아보자¶

2-1. 손글씨 데이터 의사결정트리로 정확도 측정¶

그래서 정확도 뿐만아니라 다른 척도들도 사용함¶

3. 오차행렬 https://manisha-sirsat.blogspot.com/2019/04/confusion-matrix.html¶

3-1 손글씨를 분류¶

서포트 벡터 머신이 가장 정확하게 나옴¶

3-2 와인 분류¶

랜덤포레스트 분류가 적당함¶

3-3 유방암 진단 분류¶

로지스틱 회귀가 적합함¶

'AIFFEL' 카테고리의 다른 글

티스토리툴바

3. 오차행렬 https://manisha-sirsat.blogspot.com/2019/04/confusion-matrix.html ¶

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	label
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2