Longest Common Subsequence (LCSS) is another non-euclidean similarity measure for two timeseries, which may be useful on data with gaps. The Time-Warped Longest Common Subsequence Algorithm For Music Retrieval is included, because the implementation may be useful for others.
TimeSeriesUtils.hpp
#ifndef TSUTIL_H
#define TSUTIL_H
#include <vector>
#include <cstdint>
namespace TimeSeriesUtils {
double CalculateEuclideanDistance(double x, double y);
double CalculateDynamicTimeWarpedDistance(std::vector<double> t0, std::vector<double> t1);
int CalculateLongestCommonSubsequence(std::vector<double> t0, std::vector<double> t1);
int CalculateTimeWarpedLongestCommonSubsequence(std::vector<double> t0, std::vector<double> t1);
}
#endif
TimeSeriesUtils.cpp
#include <vector>
#include <algorithm>
#include "../include/TimeSeriesUtils.hpp"
namespace TimeSeriesUtils {
double CalculateEuclideanDistance(double x, double y) {
return std::sqrt(std::pow((x - y), 2));
}
double CalculateDynamicTimeWarpedDistance(std::vector<double> t0, std::vector<double> t1) {
size_t m = t0.size();
size_t n = t1.size();
// Allocate the Matrix to work on:
std::vector<std::vector<double>> cost(m, std::vector<double>(n));
cost[0][0] = CalculateEuclideanDistance(t0[0], t1[0]);
// Calculate the first row:
for (int i = 1; i < m; i++) {
cost[i][0] = cost[i - 1][0] + CalculateEuclideanDistance(t0[i], t1[0]);
}
// Calculate the first column:
for (int j = 1; j < n; j++) {
cost[0][j] = cost[0][j - 1] + CalculateEuclideanDistance(t0[0], t1[j]);
}
// Fill the matrix:
for (int i = 1; i < m; i++) {
for (int j = 1; j < n; j++) {
cost[i][j] = std::min(cost[i - 1][j], std::min(cost[i][j - 1], cost[i - 1][j - 1]))
+ CalculateEuclideanDistance(t0[i], t1[j]);
}
}
return cost[m-1][n-1];
}
int CalculateLongestCommonSubsequence(std::vector<double> t0, std::vector<double> t1) {
size_t m = t0.size() + 1;
size_t n = t1.size() + 1;
// Allocate the Matrix to work on:
std::vector<std::vector<int>> cost(m, std::vector<int>(n));
cost[0][0] = 0;
// Calculate the first row:
for (int i = 1; i < m; i++) {
cost[i][0] = 0;
}
// Calculate the first column:
for (int j = 1; j < n; j++) {
cost[0][j] = 0;
}
for(int i = 1; i < m; i++) {
for(int j = 1; j < n; j++) {
if(std::abs(t0[i-1] - t1[j-1]) <= 1e-10) {
cost[i][j] = 1 + cost[i-1][j-1];
} else {
cost[i][j] = std::max(cost[i][j-1], cost[i-1][j]);
}
}
}
return cost[m-1][n-1];
}
int CalculateTimeWarpedLongestCommonSubsequence(std::vector<double> t0, std::vector<double> t1) {
size_t m = t0.size() + 1;
size_t n = t1.size() + 1;
// Allocate the Matrix to work on:
std::vector<std::vector<int>> cost(m, std::vector<int>(n));
cost[0][0] = 0;
// Calculate the first row:
for (int i = 1; i < m; i++) {
cost[i][0] = 0;
}
// Calculate the first column:
for (int j = 1; j < n; j++) {
cost[0][j] = 0;
}
for(int i = 1; i < m; i++) {
for(int j = 1; j < n; j++) {
if(std::abs(t0[i-1] - t1[j-1]) <= 1e-10) {
cost[i][j] = 1 + std::max(cost[i][j-1], std::max(cost[i-1][j], cost[i-1][j-1]));
} else {
cost[i][j] = std::max(cost[i][j-1], cost[i-1][j]);
}
}
}
return cost[m-1][n-1];
}
}
usage
#include "../include/TimeSeriesUtils.hpp"
#include <vector>
#include <cstdint>
#include <catch.hpp>
namespace {
TEST_CASE("DynamicTimeWarpedDistanceTest")
{
std::vector<double> timeSeries0;
std::vector<double> timeSeries1;
timeSeries0.push_back(4);
timeSeries0.push_back(4);
timeSeries0.push_back(5);
timeSeries0.push_back(5);
timeSeries0.push_back(6);
timeSeries0.push_back(6);
timeSeries0.push_back(7);
timeSeries0.push_back(7);
timeSeries1.push_back(23);
timeSeries1.push_back(4);
timeSeries1.push_back(5);
timeSeries1.push_back(6);
timeSeries1.push_back(7);
double result = TimeSeriesUtils::CalculateDynamicTimeWarpedDistance(timeSeries0, timeSeries1);
REQUIRE(result == Approx(19).epsilon(0.01));
}
TEST_CASE("LongestCommonSubsequenceTest")
{
std::vector<double> timeSeries0;
std::vector<double> timeSeries1;
timeSeries0.push_back(4);
timeSeries0.push_back(4);
timeSeries0.push_back(5);
timeSeries0.push_back(5);
timeSeries0.push_back(6);
timeSeries0.push_back(6);
timeSeries0.push_back(7);
timeSeries0.push_back(7);
timeSeries1.push_back(4);
timeSeries1.push_back(5);
timeSeries1.push_back(6);
timeSeries1.push_back(7);
int result = TimeSeriesUtils::CalculateLongestCommonSubsequence(timeSeries0, timeSeries1);
REQUIRE(result == 4);
}
}
notes
You can obtain the sourcecode from my github page at: