Implementation of KNN algorithm in Python
The k-nearest neighbors algorithm (k-NN) is a non-parametric method used for
classification and regression. In both cases, the input consists of the k closest
training examples in the feature space.The K-nearest neighbor classification
performance can often be significantly improved through (supervised) metric learning.
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt
iris = load_iris()
# Create feature and target arrays
X = iris.data
y = iris.target
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size = 0.2, random_state=42)
neighbors = np.arange(1, 9)
train_acc = np.empty(len(neighbors))
test_acc = np.empty(len(neighbors))
# Loop over K values
for i, k in enumerate(neighbors):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
# Compute traning and test data accuracy
train_acc[i] = knn.score(X_train, y_train)
test_acc[i] = knn.score(X_test, y_test)
# Generate plot
plt.plot(neighbors, test_acc, label = 'Testing dataset Accuracy')
plt.plot(neighbors, train_acc, label = 'Training dataset Accuracy')
plt.legend()
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.show()
Leave a Comment