Source code for lostml.neighbors.knn

import numpy as np
from .base import _BaseNeighbors
from ..utils.distances import euclidean_distance, manhattan_distance

[docs] class KNN(_BaseNeighbors):
[docs] def __init__(self, n_neighbors=5, metric='euclidean'): super().__init__(n_neighbors) self.metric = metric
def _get_distances(self, X): if self.metric == 'euclidean': return euclidean_distance(X, self.X_train) elif self.metric == 'manhattan': return manhattan_distance(X, self.X_train) else: raise ValueError(f"Invalid metric: {self.metric}") def _find_k_neighbors(self, distances): """ Find k nearest neighbors. Returns indices. Parameters ---------- distances : ndarray Shape (n_test, n_train) or (n_train,) for single sample Returns ------- ndarray Indices of k nearest neighbors. Shape (n_test, n_neighbors) or (n_neighbors,) """ # Handle single sample (1D distances) if distances.ndim == 1: return np.argsort(distances)[:self.n_neighbors] # Handle multiple samples (2D distances) # Sort along axis=1 (for each test sample), then take first k columns return np.argsort(distances, axis=1)[:, :self.n_neighbors]
[docs] def predict(self, X): """ Predict class labels for samples in X. Parameters ---------- X : array-like of shape (n_samples, n_features) Test samples Returns ------- ndarray of shape (n_samples,) Predicted class labels """ # Check fitted self._check_fitted() # Convert to numpy array X = np.asarray(X) # Handle single sample if X.ndim == 1: X = X.reshape(1, -1) single_sample = True else: single_sample = False # Get distances (shape: n_test × n_train) distances = self._get_distances(X) # Find k nearest neighbors indices (shape: n_test × n_neighbors) neighbors_indices = self._find_k_neighbors(distances) # Get labels of k neighbors (shape: n_test × n_neighbors) k_labels = self.y_train[neighbors_indices] # Vote for most common class for each test sample predictions = [] for labels in k_labels: # Count occurrences of each class unique_labels, counts = np.unique(labels, return_counts=True) # Get the class with highest count (if tie, first one wins) most_common = unique_labels[np.argmax(counts)] predictions.append(most_common) predictions = np.array(predictions) # Return scalar if single sample was provided if single_sample: return predictions[0] return predictions