diff --git a/MachineLearningLab/MachineLearningLab/src/Classification/KNNClassifier.cpp b/MachineLearningLab/MachineLearningLab/src/Classification/KNNClassifier.cpp index 2d3cde4698203d7cd099e7031eb551de9105fd7d..b66d4fda620a232b2899b87a6c31a536f2737cf6 100644 --- a/MachineLearningLab/MachineLearningLab/src/Classification/KNNClassifier.cpp +++ b/MachineLearningLab/MachineLearningLab/src/Classification/KNNClassifier.cpp @@ -41,6 +41,46 @@ std::vector<double> KNNClassifier::predict(const std::vector<std::vector<double> throw std::runtime_error("Error: Empty training data."); } + for (std::size_t i = 0; i < X_test.size(); ++i) { + std::vector<std::pair<double, double>> distanceAndLabel; + + for (std::size_t j = 0; j < X_train_.size(); ++j) { + std::vector<double> a = X_test.at(i); + + std::vector<double> b = X_train_.at(j); + double label = y_train_.at(j); + + double dist = SimilarityFunctions::euclideanDistance(a, b); + distanceAndLabel.push_back(std::make_pair(dist, label)); + } + + std::sort(distanceAndLabel.begin(), distanceAndLabel.end()); + + std::vector<std::pair<double, double>> kNearest = std::vector<std::pair<double, double>>(distanceAndLabel.begin(), distanceAndLabel.begin() + k_); + + std::unordered_map<double, int> mostOccuring = std::unordered_map<double, int>(); + + for (const std::pair<double, double>& elem : kNearest) + { + double label = elem.second; + mostOccuring[label]++; + } + + int highestCount = 0; + double classifier = 0.0; + + for (const std::pair<double, int>& elem : mostOccuring) { + double label = elem.first; + int count = elem.second; + + if (count > highestCount) { + classifier = label; + } + } + + y_pred.push_back(classifier); + } + /* Implement the following: --- Loop through each test data point --- Calculate Euclidean distance between test data point and each training data point diff --git a/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.cpp b/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.cpp index 8ac06ad17d781b88b4a14af7e3d250007231b4bc..fae50534f1935f0bf60ee217c81a36b64df15d26 100644 --- a/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.cpp +++ b/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.cpp @@ -12,9 +12,10 @@ double SimilarityFunctions::hammingDistance(const std::vector<double>& v1, const throw std::invalid_argument("Vectors must be of equal length."); } double dist = 0.0; - - // Compute the Hamming Distance - //TODO + + for (std::size_t i = 0; i < v1.size(); ++i) { + dist += std::abs(v1.at(i) - v2.at(i)); + } return dist; } @@ -28,10 +29,29 @@ double SimilarityFunctions::jaccardDistance(const std::vector<double>& a, const double num = 0.0; double den = 0.0; double dist = 0.0; - + // Compute the Jaccard Distance // TODO + // vector to set + std::set<double> setA(a.begin(), a.end()); + std::set<double> setB(b.begin(), b.end()); + + // calculate the intersection + std::set<double> intersection; + for (double elem : setA) { + if (setB.count(elem) > 0) { + intersection.insert(elem); + } + } + + // calculate the union + std::set<double> unionSet = setA; + unionSet.insert(setB.begin(), setB.end()); + + // Jaccard Distance + dist = 1.0 - static_cast<double>(intersection.size()) / unionSet.size(); + return dist; } @@ -44,12 +64,25 @@ double SimilarityFunctions::cosineDistance(const std::vector<double>& a, const s double dotProduct = 0.0; double normA = 0.0; double normB = 0.0; - double cosinedist = 0.0; - + double cosinedist = 0.0; + // Compute the cosine Distance // TODO + // scalar product + for (size_t i = 0; i < a.size(); ++i) { + dotProduct += a[i] * b[i]; + } + + // calcul of the lenght + for (size_t i = 0; i < a.size(); ++i) { + normA += a[i] * a[i]; + normB += b[i] * b[i]; + } + normA = std::sqrt(normA); + normB = std::sqrt(normB); + + cosinedist = dotProduct / (normA * normB); - return cosinedist; } @@ -60,10 +93,14 @@ double SimilarityFunctions::euclideanDistance(const std::vector<double>& a, cons throw std::invalid_argument("Vectors must be of equal length."); } double dist = 0.0; - // Compute the Euclidean Distance // TODO - + for (size_t i = 0; i < a.size(); ++i) { + double difference = a[i] - b[i]; + dist += difference * difference; + } + + dist = std::sqrt(dist); return dist; } @@ -75,10 +112,14 @@ double SimilarityFunctions::manhattanDistance(const std::vector<double>& a, cons throw std::invalid_argument("Vectors must be of equal length."); } double dist = 0.0; - + // Compute the Manhattan Distance // TODO + for (size_t i = 0; i < a.size(); ++i) { + dist += std::abs(a[i] - b[i]); + } + return dist; } @@ -88,10 +129,14 @@ double SimilarityFunctions::minkowskiDistance(const std::vector<double>& a, cons throw std::invalid_argument("Vectors must be of equal length."); } double dist = 0.0; - + // Compute the Minkowski Distance // TODO - + for (size_t i = 0; i < a.size(); ++i) { + dist += std::pow(std::abs(a[i] - b[i]), p); + } + + dist = std::pow(dist, 1.0 / p); return dist; } diff --git a/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.h b/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.h index e4639bf94264a78f7d55387cd392a307102c1597..ec6c3ca05b16571ca9ff7afdc7a0d31788ffa3aa 100644 --- a/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.h +++ b/MachineLearningLab/MachineLearningLab/src/Utils/SimilarityFunctions.h @@ -1,6 +1,7 @@ #ifndef SIMILARITYFUNCTIONS_H #define SIMILARITYFUNCTIONS_H #include <vector> +#include <set> /// SimilarityFunctions class definition ///