Source code for lostml.neighbors.knn
import numpy as np
from .base import _BaseNeighbors
from ..utils.distances import euclidean_distance, manhattan_distance
[docs]
class KNN(_BaseNeighbors):
[docs]
def __init__(self, n_neighbors=5, metric='euclidean'):
super().__init__(n_neighbors)
self.metric = metric
def _get_distances(self, X):
if self.metric == 'euclidean':
return euclidean_distance(X, self.X_train)
elif self.metric == 'manhattan':
return manhattan_distance(X, self.X_train)
else:
raise ValueError(f"Invalid metric: {self.metric}")
def _find_k_neighbors(self, distances):
"""
Find k nearest neighbors. Returns indices.
Parameters
----------
distances : ndarray
Shape (n_test, n_train) or (n_train,) for single sample
Returns
-------
ndarray
Indices of k nearest neighbors. Shape (n_test, n_neighbors) or (n_neighbors,)
"""
# Handle single sample (1D distances)
if distances.ndim == 1:
return np.argsort(distances)[:self.n_neighbors]
# Handle multiple samples (2D distances)
# Sort along axis=1 (for each test sample), then take first k columns
return np.argsort(distances, axis=1)[:, :self.n_neighbors]
[docs]
def predict(self, X):
"""
Predict class labels for samples in X.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples
Returns
-------
ndarray of shape (n_samples,)
Predicted class labels
"""
# Check fitted
self._check_fitted()
# Convert to numpy array
X = np.asarray(X)
# Handle single sample
if X.ndim == 1:
X = X.reshape(1, -1)
single_sample = True
else:
single_sample = False
# Get distances (shape: n_test × n_train)
distances = self._get_distances(X)
# Find k nearest neighbors indices (shape: n_test × n_neighbors)
neighbors_indices = self._find_k_neighbors(distances)
# Get labels of k neighbors (shape: n_test × n_neighbors)
k_labels = self.y_train[neighbors_indices]
# Vote for most common class for each test sample
predictions = []
for labels in k_labels:
# Count occurrences of each class
unique_labels, counts = np.unique(labels, return_counts=True)
# Get the class with highest count (if tie, first one wins)
most_common = unique_labels[np.argmax(counts)]
predictions.append(most_common)
predictions = np.array(predictions)
# Return scalar if single sample was provided
if single_sample:
return predictions[0]
return predictions