Skip to content
Snippets Groups Projects
Commit 87266e2b authored by Silas Meister's avatar Silas Meister
Browse files

Implement KNN Classifier

parent 91de7b2b
No related branches found
No related tags found
No related merge requests found
......@@ -41,6 +41,46 @@ std::vector<double> KNNClassifier::predict(const std::vector<std::vector<double>
throw std::runtime_error("Error: Empty training data.");
}
for (std::size_t i = 0; i < X_test.size(); ++i) {
std::vector<std::pair<double, double>> distanceAndLabel;
for (std::size_t j = 0; j < X_train_.size(); ++j) {
std::vector<double> a = X_test.at(i);
std::vector<double> b = X_train_.at(j);
double label = y_train_.at(j);
double dist = SimilarityFunctions::euclideanDistance(a, b);
distanceAndLabel.push_back(std::make_pair(dist, label));
}
std::sort(distanceAndLabel.begin(), distanceAndLabel.end());
std::vector<std::pair<double, double>> kNearest = std::vector<std::pair<double, double>>(distanceAndLabel.begin(), distanceAndLabel.begin() + k_);
std::unordered_map<double, int> mostOccuring = std::unordered_map<double, int>();
for (const std::pair<double, double>& elem : kNearest)
{
double label = elem.second;
mostOccuring[label]++;
}
int highestCount = 0;
double classifier = 0.0;
for (const std::pair<double, int>& elem : mostOccuring) {
double label = elem.first;
int count = elem.second;
if (count > highestCount) {
classifier = label;
}
}
y_pred.push_back(classifier);
}
/* Implement the following:
--- Loop through each test data point
--- Calculate Euclidean distance between test data point and each training data point
......
......@@ -12,9 +12,10 @@ double SimilarityFunctions::hammingDistance(const std::vector<double>& v1, const
throw std::invalid_argument("Vectors must be of equal length.");
}
double dist = 0.0;
// Compute the Hamming Distance
//TODO
for (std::size_t i = 0; i < v1.size(); ++i) {
dist += std::abs(v1.at(i) - v2.at(i));
}
return dist;
}
......@@ -28,10 +29,29 @@ double SimilarityFunctions::jaccardDistance(const std::vector<double>& a, const
double num = 0.0;
double den = 0.0;
double dist = 0.0;
// Compute the Jaccard Distance
// TODO
// vector to set
std::set<double> setA(a.begin(), a.end());
std::set<double> setB(b.begin(), b.end());
// calculate the intersection
std::set<double> intersection;
for (double elem : setA) {
if (setB.count(elem) > 0) {
intersection.insert(elem);
}
}
// calculate the union
std::set<double> unionSet = setA;
unionSet.insert(setB.begin(), setB.end());
// Jaccard Distance
dist = 1.0 - static_cast<double>(intersection.size()) / unionSet.size();
return dist;
}
......@@ -44,12 +64,25 @@ double SimilarityFunctions::cosineDistance(const std::vector<double>& a, const s
double dotProduct = 0.0;
double normA = 0.0;
double normB = 0.0;
double cosinedist = 0.0;
double cosinedist = 0.0;
// Compute the cosine Distance
// TODO
// scalar product
for (size_t i = 0; i < a.size(); ++i) {
dotProduct += a[i] * b[i];
}
// calcul of the lenght
for (size_t i = 0; i < a.size(); ++i) {
normA += a[i] * a[i];
normB += b[i] * b[i];
}
normA = std::sqrt(normA);
normB = std::sqrt(normB);
cosinedist = dotProduct / (normA * normB);
return cosinedist;
}
......@@ -60,10 +93,14 @@ double SimilarityFunctions::euclideanDistance(const std::vector<double>& a, cons
throw std::invalid_argument("Vectors must be of equal length.");
}
double dist = 0.0;
// Compute the Euclidean Distance
// TODO
for (size_t i = 0; i < a.size(); ++i) {
double difference = a[i] - b[i];
dist += difference * difference;
}
dist = std::sqrt(dist);
return dist;
}
......@@ -75,10 +112,14 @@ double SimilarityFunctions::manhattanDistance(const std::vector<double>& a, cons
throw std::invalid_argument("Vectors must be of equal length.");
}
double dist = 0.0;
// Compute the Manhattan Distance
// TODO
for (size_t i = 0; i < a.size(); ++i) {
dist += std::abs(a[i] - b[i]);
}
return dist;
}
......@@ -88,10 +129,14 @@ double SimilarityFunctions::minkowskiDistance(const std::vector<double>& a, cons
throw std::invalid_argument("Vectors must be of equal length.");
}
double dist = 0.0;
// Compute the Minkowski Distance
// TODO
for (size_t i = 0; i < a.size(); ++i) {
dist += std::pow(std::abs(a[i] - b[i]), p);
}
dist = std::pow(dist, 1.0 / p);
return dist;
}
......
#ifndef SIMILARITYFUNCTIONS_H
#define SIMILARITYFUNCTIONS_H
#include <vector>
#include <set>
/// SimilarityFunctions class definition ///
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment