# First import all dependencies
from sklearn import datasets
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
iris = datasets.load_iris()
for name in iris.keys():
print(name)
iris.data[:5]
iris.data[0]
iris.feature_names
iris.target
iris.target[0]
for target_name in iris.target_names:
print(target_name)
iris.data.shape
x = iris.data
y = iris.target
df = pd.DataFrame(x, columns = iris.feature_names)
df.head()
sp = pd.plotting.scatter_matrix(df, c=y, figsize= [10,10], s=150, marker = 'D')
from sklearn.neighbors import KNeighborsClassifier
knnModel = KNeighborsClassifier(n_neighbors = 5)
knnModel.fit(x,y)
test_data = [[4.8, 3.0, 1.5, 0.2],
[4.7, 2.9, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.2],
[4.5, 3.0, 1.6, 0.2]]
prediction = knnModel.predict(test_data)
prediction
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=21, stratify=y)
# n_neighbors = 3 gives the maximum score i think
knnSplitTestModel = KNeighborsClassifier(n_neighbors = 7)
knnSplitTestModel.fit(x_train, y_train)
splitPrediction = knnSplitTestModel.predict(x_test)
splitPrediction
knnSplitTestModel.score(x_test, y_test)