cuTimeWarp.bib

@article{cuturi_soft-dtw_2018,
  title        = {Soft-{DTW}: a Differentiable Loss Function for Time-Series},
  url          = {http://arxiv.org/abs/1703.01541},
  shorttitle   = {Soft-{DTW}},
  abstract     = {We propose in this paper a differentiable learning loss between time series, building upon the celebrated dynamic time warping ({DTW}) discrepancy. Unlike the Euclidean distance, {DTW} can compare time series of variable size and is robust to shifts or dilatations across the time dimension. To compute {DTW}, one typically solves a minimal-cost alignment problem between two time series using dynamic programming. Our work takes advantage of a smoothed formulation of {DTW}, called soft-{DTW}, that computes the soft-minimum of all alignment costs. We show in this paper that soft-{DTW} is a differentiable loss function, and that both its value and gradient can be computed with quadratic time/space complexity ({DTW} has quadratic time but linear space complexity). We show that this regularization is particularly well suited to average and cluster time series under the {DTW} geometry, a task for which our proposal significantly outperforms existing baselines. Next, we propose to tune the parameters of a machine that outputs time series by minimizing its fit with ground-truth labels in a soft-{DTW} sense.},
  journaltitle = {{arXiv}:1703.01541 [stat]},
  author       = {Cuturi, Marco and Blondel, Mathieu},
  urldate      = {2021-01-16},
  date         = {2018-02-20},
  eprinttype   = {arxiv},
  eprint       = {1703.01541},
  keywords     = {Statistics - Machine Learning},
  file         = {arXiv Fulltext PDF:C\:\\Users\\jekyllo\\Zotero\\storage\\Q3AVE6KU\\Cuturi and Blondel - 2018 - Soft-DTW a Differentiable Loss Function for Time-.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\jekyllo\\Zotero\\storage\\3TZ2UJC3\\1703.html:text/html}
}

@inproceedings{belviranli_peerwave_2015,
  location   = {Newport Beach California {USA}},
  title      = {{PeerWave}: Exploiting Wavefront Parallelism on {GPUs} with Peer-{SM} Synchronization},
  isbn       = {978-1-4503-3559-1},
  url        = {https://dl.acm.org/doi/10.1145/2751205.2751243},
  doi        = {10.1145/2751205.2751243},
  shorttitle = {{PeerWave}},
  abstract   = {Nested loops with regular iteration dependencies span a large class of applications ranging from string matching to linear system solvers. Wavefront parallelism is a well-known technique to enable concurrent processing of such applications and is widely being used on {GPUs} to beneﬁt from their massively parallel computing capabilities. Wavefront parallelism on {GPUs} uses global barriers between processing of tiles to enforce data dependencies. However, such diagonal-wide synchronization causes load imbalance by forcing {SMs} to wait for the completion of the {SM} with longest computation. Moreover, diagonal processing causes loss of locality due to elements that border adjacent tiles. In this paper, we propose {PeerWave}, an alternative {GPU} wavefront parallelization technique that improves inter-{SM} load balance by using peer-wise synchronization between {SMs}. and eliminating global synchronization. Our approach also increases {GPU} L2 cache locality through row allocation of tiles to the {SMs}. We further improve {PeerWave} performance by using ﬂexible hyper-tiles that reduce inter-{SM} wait time while maximizing intra-{SM} utilization. We develop an analytical model for determining the optimal tile size. Finally, we present a run-time and a {CUDA} based {API} to allow users to easily implement their applications using {PeerWave}. We evaluate {PeerWave} on the {NVIDIA} K40c {GPU} using 6 diﬀerent applications and achieve speedups of up to 2X compared to the most recent hyperplane transformation based {GPU} implementation.},
  eventtitle = {{ICS}'15: 2015 International Conference on Supercomputing},
  pages      = {25--35},
  booktitle  = {Proceedings of the 29th {ACM} on International Conference on Supercomputing},
  publisher  = {{ACM}},
  author     = {Belviranli, Mehmet E. and Deng, Peng and Bhuyan, Laxmi N. and Gupta, Rajiv and Zhu, Qi},
  urldate    = {2021-03-03},
  date       = {2015-06-08},
  langid     = {english},
  file       = {Belviranli et al. - 2015 - PeerWave Exploiting Wavefront Parallelism on GPUs.pdf:C\:\\Users\\jekyllo\\Zotero\\storage\\ZPUGYVBV\\Belviranli et al. - 2015 - PeerWave Exploiting Wavefront Parallelism on GPUs.pdf:application/pdf}
}

@inproceedings{xiao_parallelizing_2013,
  title      = {Parallelizing Dynamic Time Warping Algorithm Using Prefix Computations on {GPU}},
  doi        = {10.1109/HPCC.and.EUC.2013.50},
  abstract   = {The dynamic time warping ({DTW}) algorithm has O(n2) time complexity, which indicates that it is hard to process large-scale time series within an acceptable time. Recently, many researchers have used graphics processing units ({GPUs}) to accelerate the algorithm. Owing to the data dependence of {DTW}, however, most of existing {GPU}-based {DTW} algorithms exploits task-level parallelism by simply replicating the serial algorithm on every multiprocessor of a {GPU}. The fundamental issue with such coarse-grained parallelism is that the solvable problem size is severely limited by the share memory and cache of a {GPU} multiprocessor. In this study, we introduce a specific transformation of data dependence by using prefix computations. Further, we propose an efficient {GPU}-parallel {DTW} algorithm to address the problem of instance sizes limitation. The efficiency of our algorithm is validated through experiments, which demonstrate improved performance over existing {GPU}-based task-level parallel {DTW} algorithms. Our experimental results indicate speedups up to 99 times faster on {NVIDIA} {GTX}480, compared to {CPU} implementations.},
  eventtitle = {2013 {IEEE} 10th International Conference on High Performance Computing and Communications 2013 {IEEE} International Conference on Embedded and Ubiquitous Computing},
  pages      = {294--299},
  booktitle  = {2013 {IEEE} 10th International Conference on High Performance Computing and Communications 2013 {IEEE} International Conference on Embedded and Ubiquitous Computing},
  author     = {Xiao, L. and Zheng, Y. and Tang, W. and Yao, G. and Ruan, L.},
  date       = {2013-11},
  keywords   = {dynamic time warping, time series, Time series analysis, graphics processing units, Graphics processing units, Instruction sets, multiprocessing systems, {CUDA}, {GPU}, parallel algorithm, Arrays, data dependence, {GPU} based {DTW} algorithms, {GPU} multiprocessor, Heuristic algorithms, {NVIDIA} {GTX}480, Parallel algorithms, parallel {DTW} algorithms, parallel processing, parallelizing dynamic time warping algorithm, prefix computations, serial algorithm, time complexity}
}

@article{zhu_developing_2018,
  title        = {Developing a pattern discovery method in time series data and its {GPU} acceleration},
  volume       = {1},
  issn         = {2096-0654},
  doi          = {10.26599/BDMA.2018.9020021},
  abstract     = {The Dynamic Time Warping ({DTW}) algorithm is widely used in finding the global alignment of time series. Many time series data mining and analytical problems can be solved by the {DTW} algorithm. However, using the {DTW} algorithm to find similar subsequences is computationally expensive or unable to perform accurate analysis. Hence, in the literature, the parallelisation technique is used to speed up the {DTW} algorithm. However, due to the nature of {DTW} algorithm, parallelizing this algorithm remains an open challenge. In this paper, we first propose a novel method that finds the similar local subsequence. Our algorithm first searches for the possible start positions of subsequence, and then finds the best-matching alignment from these positions. Moreover, we parallelize the proposed algorithm on {GPUs} using {CUDA} and further propose an optimization technique to improve the performance of our parallelization implementation on {GPU}. We conducted the extensive experiments to evaluate the proposed method. Experimental results demonstrate that the proposed algorithm is able to discover time series subsequences efficiently and that the proposed {GPU}-based parallelization technique can further speedup the processing.},
  pages        = {266--283},
  number       = {4},
  journaltitle = {Big Data Mining and Analytics},
  author       = {Zhu, H. and Gu, Z. and Zhao, H. and Chen, K. and Li, C. and He, L.},
  date         = {2018-12},
  note         = {Conference Name: Big Data Mining and Analytics},
  keywords     = {data mining, Data mining, dynamic time warping, time series, Time series analysis, graphics processing units, Graphics processing units, Instruction sets, parallel architectures, {CUDA}, time series data mining, {GPGPU}, parallel algorithm, parallel algorithms, Heuristic algorithms, parallel processing, analytical problems, data analysis, {DTW} algorithm, Dynamic Time Warping algorithm, {GPU} acceleration, Microsoft Windows, parallelisation technique, pattern discovery, pattern discovery method, Pattern matching, time series data},
  file         = {IEEE Xplore Full Text PDF:C\:\\Users\\jekyllo\\Zotero\\storage\\RA77H8XQ\\Zhu et al. - 2018 - Developing a pattern discovery method in time seri.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\jekyllo\\Zotero\\storage\\UB2MY8VN\\8400444.html:text/html}
}

@software{maghoumi_pytorch-softdtw-cuda_2021,
  title    = {Maghoumi/pytorch-softdtw-cuda},
  rights   = {{MIT} License         ,                 {MIT} License},
  url      = {https://github.com/Maghoumi/pytorch-softdtw-cuda},
  abstract = {Fast {CUDA} implementation of (differentiable) soft dynamic time warping for {PyTorch} using Numba},
  author   = {Maghoumi, Mehran},
  urldate  = {2021-01-23},
  date     = {2021-01-21},
  note     = {original-date: 2020-05-02T23:28:24Z},
  keywords = {cuda, deep-learning, dynamic-time-warping, pytorch, soft-dtw}
}

@article{shen_tc-dtw_2021,
  title        = {{TC}-{DTW}: Accelerating Multivariate Dynamic Time Warping Through Triangle Inequality and Point Clustering},
  url          = {http://arxiv.org/abs/2101.07731},
  shorttitle   = {{TC}-{DTW}},
  abstract     = {Dynamic time warping ({DTW}) plays an important role in analytics on time series. Despite the large body of research on speeding up univariate {DTW}, the method for multivariate {DTW} has not been improved much in the last two decades. The most popular algorithm used today is still the one developed seventeen years ago. This paper presents a solution that, as far as we know, for the ﬁrst time consistently outperforms the classic multivariate {DTW} algorithm across dataset sizes, series lengths, data dimensions, temporal window sizes, and machines. The new solution, named {TC}-{DTW}, introduces Triangle Inequality and Point Clustering into the algorithm design on lower bound calculations for multivariate {DTW}. In experiments on {DTW}-based nearest neighbor ﬁnding, the new solution avoids as much as 98\% (60\% average) {DTW} distance calculations and yields as much as 25× (7.5× average) speedups.},
  journaltitle = {{arXiv}:2101.07731 [cs]},
  author       = {Shen, Daniel and Chi, Min},
  urldate      = {2021-02-14},
  date         = {2021-01-19},
  langid       = {english},
  eprinttype   = {arxiv},
  eprint       = {2101.07731},
  keywords     = {Computer Science - Artificial Intelligence, Computer Science - Databases, Computer Science - Machine Learning},
  file         = {Shen and Chi - 2021 - TC-DTW Accelerating Multivariate Dynamic Time War.pdf:C\:\\Users\\jekyllo\\Zotero\\storage\\YERGC7JA\\Shen and Chi - 2021 - TC-DTW Accelerating Multivariate Dynamic Time War.pdf:application/pdf}
}

@inproceedings{keogh_exact_2002,
  location  = {Hong Kong, China},
  title     = {Exact indexing of dynamic time warping},
  series    = {{VLDB} '02},
  abstract  = {The problem of indexing time series has attracted much research interest in the database community. Most algorithms used to index time series utilize the Euclidean distance or some variation thereof. However is has been forcefully shown that the Euclidean distance is a very brittle distance measure. Dynamic Time Warping ({DTW}) is a much more robust distance measure for time series, allowing similar shapes to match even if they are out of phase in the time axis. Because of this flexibility, {DTW} is widely used in science, medicine, industry and finance. Unfortunately however, {DTW} does not obey the triangular inequality, and thus has resisted attempts at exact indexing. Instead, many researchers have introduced approximate indexing techniques, or abandoned the idea of indexing and concentrated on speeding up sequential search. In this work we introduce a novel technique for the exact indexing of {DTW}. We prove that our method guarantees no false dismissals and we demonstrate its vast superiority over all competing approaches in the largest and most comprehensive set of time series indexing experiments ever undertaken.},
  pages     = {406--417},
  booktitle = {Proceedings of the 28th international conference on Very Large Data Bases},
  publisher = {{VLDB} Endowment},
  author    = {Keogh, Eamonn},
  urldate   = {2021-02-14},
  date      = {2002-08-20},
  file      = {Full Text PDF:C\:\\Users\\jekyllo\\Zotero\\storage\\RFRE3B7R\\Keogh - 2002 - Exact indexing of dynamic time warping.pdf:application/pdf}
}


@article{rakthanmanon_addressing_2013,
  title        = {Addressing Big Data Time Series: Mining Trillions of Time Series Subsequences Under Dynamic Time Warping},
  volume       = {7},
  issn         = {1556-4681},
  url          = {http://doi.org/10.1145/2500489},
  doi          = {10.1145/2500489},
  shorttitle   = {Addressing Big Data Time Series},
  abstract     = {Most time series data mining algorithms use similarity search as a core subroutine, and thus the time taken for similarity search is the bottleneck for virtually all time series data mining algorithms, including classification, clustering, motif discovery, anomaly detection, and so on. The difficulty of scaling a search to large datasets explains to a great extent why most academic work on time series data mining has plateaued at considering a few millions of time series objects, while much of industry and science sits on billions of time series objects waiting to be explored. In this work we show that by using a combination of four novel ideas we can search and mine massive time series for the first time. We demonstrate the following unintuitive fact: in large datasets we can exactly search under Dynamic Time Warping ({DTW}) much more quickly than the current state-of-the-art Euclidean distance search algorithms. We demonstrate our work on the largest set of time series experiments ever attempted. In particular, the largest dataset we consider is larger than the combined size of all of the time series datasets considered in all data mining papers ever published. We explain how our ideas allow us to solve higher-level time series data mining problems such as motif discovery and clustering at scales that would otherwise be untenable. Moreover, we show how our ideas allow us to efficiently support the uniform scaling distance measure, a measure whose utility seems to be underappreciated, but which we demonstrate here. In addition to mining massive datasets with up to one trillion datapoints, we will show that our ideas also have implications for real-time monitoring of data streams, allowing us to handle much faster arrival rates and/or use cheaper and lower powered devices than are currently possible.},
  pages        = {10:1--10:31},
  number       = {3},
  journaltitle = {{ACM} Transactions on Knowledge Discovery from Data},
  shortjournal = {{ACM} Trans. Knowl. Discov. Data},
  author       = {Rakthanmanon, Thanawin and Campana, Bilson and Mueen, Abdullah and Batista, Gustavo and Westover, Brandon and Zhu, Qiang and Zakaria, Jesin and Keogh, Eamonn},
  urldate      = {2021-03-07},
  date         = {2013-09-01},
  keywords     = {lower bounds, similarity search, Time series},
  file         = {Full Text PDF:/home/alex/Zotero/storage/CIDL3E5F/Rakthanmanon et al. - 2013 - Addressing Big Data Time Series Mining Trillions .pdf:application/pdf}
}

@article{dau_ucr_2019,
  title        = {The {UCR} Time Series Archive},
  url          = {http://arxiv.org/abs/1810.07758},
  abstract     = {The {UCR} Time Series Archive - introduced in 2002, has become an important resource in the time series data mining community, with at least one thousand published papers making use of at least one data set from the archive. The original incarnation of the archive had sixteen data sets but since that time, it has gone through periodic expansions. The last expansion took place in the summer of 2015 when the archive grew from 45 to 85 data sets. This paper introduces and will focus on the new data expansion from 85 to 128 data sets. Beyond expanding this valuable resource, this paper offers pragmatic advice to anyone who may wish to evaluate a new algorithm on the archive. Finally, this paper makes a novel and yet actionable claim: of the hundreds of papers that show an improvement over the standard baseline (1-nearest neighbor classification), a large fraction may be mis-attributing the reasons for their improvement. Moreover, they may have been able to achieve the same improvement with a much simpler modification, requiring just a single line of code.},
  journaltitle = {{arXiv}:1810.07758 [cs, stat]},
  author       = {Dau, Hoang Anh and Bagnall, Anthony and Kamgar, Kaveh and Yeh, Chin-Chia Michael and Zhu, Yan and Gharghabi, Shaghayegh and Ratanamahatana, Chotirat Ann and Keogh, Eamonn},
  urldate      = {2021-03-08},
  date         = {2019-09-08},
  eprinttype   = {arxiv},
  eprint       = {1810.07758},
  keywords     = {Computer Science - Machine Learning, Statistics - Machine Learning},
  file         = {arXiv Fulltext PDF:/home/alex/Zotero/storage/TYNGXIKQ/Dau et al. - 2019 - The UCR Time Series Archive.pdf:application/pdf;arXiv.org Snapshot:/home/alex/Zotero/storage/EW3MSYPC/1810.html:text/html}
}

@article{bagnall_uea_2018,
  title        = {The {UEA} multivariate time series classification archive, 2018},
  url          = {http://arxiv.org/abs/1811.00075},
  abstract     = {In 2002, the {UCR} time series classification archive was first released with sixteen datasets. It gradually expanded, until 2015 when it increased in size from 45 datasets to 85 datasets. In October 2018 more datasets were added, bringing the total to 128. The new archive contains a wide range of problems, including variable length series, but it still only contains univariate time series classification problems. One of the motivations for introducing the archive was to encourage researchers to perform a more rigorous evaluation of newly proposed time series classification ({TSC}) algorithms. It has worked: most recent research into {TSC} uses all 85 datasets to evaluate algorithmic advances. Research into multivariate time series classification, where more than one series are associated with each class label, is in a position where univariate {TSC} research was a decade ago. Algorithms are evaluated using very few datasets and claims of improvement are not based on statistical comparisons. We aim to address this problem by forming the first iteration of the {MTSC} archive, to be hosted at the website www.timeseriesclassification.com. Like the univariate archive, this formulation was a collaborative effort between researchers at the University of East Anglia ({UEA}) and the University of California, Riverside ({UCR}). The 2018 vintage consists of 30 datasets with a wide range of cases, dimensions and series lengths. For this first iteration of the archive we format all data to be of equal length, include no series with missing data and provide train/test splits.},
  journaltitle = {{arXiv}:1811.00075 [cs, stat]},
  author       = {Bagnall, Anthony and Dau, Hoang Anh and Lines, Jason and Flynn, Michael and Large, James and Bostrom, Aaron and Southam, Paul and Keogh, Eamonn},
  urldate      = {2021-03-08},
  date         = {2018-10-31},
  eprinttype   = {arxiv},
  eprint       = {1811.00075},
  keywords     = {Computer Science - Machine Learning, Statistics - Machine Learning},
  file         = {arXiv Fulltext PDF:/home/alex/Zotero/storage/9N4ELQDB/Bagnall et al. - 2018 - The UEA multivariate time series classification ar.pdf:application/pdf;arXiv.org Snapshot:/home/alex/Zotero/storage/CRE689QM/1811.html:text/html}
}

@article{deriso_general_nodate,
  title    = {A General Optimization Framework for Dynamic Time Warping},
  abstract = {The goal of dynamic time warping is to transform or warp time in order to approximately align two signals. We pose the choice of warping function as an optimization problem with several terms in the objective. The ﬁrst term measures the misalignment of the time-warped signals. Two additional regularization terms penalize the cumulative warping and the instantaneous rate of time warping; constraints on the warping can be imposed by assigning the value +∞ to the regularization terms. Diﬀerent choices of the three objective terms yield diﬀerent time warping functions that trade oﬀ signal ﬁt or alignment and properties of the warping function. The optimization problem we formulate is a classical optimal control problem, with initial and terminal constraints, and a state dimension of one. We describe an eﬀective general method that minimizes the objective by discretizing the values of the original and warped time, and using standard dynamic programming to compute the (globally) optimal warping function with the discretized values. Iterated reﬁnement of this scheme yields a high accuracy warping function in just a few iterations. Our method is implemented as an open source Python package {GDTW}.},
  pages    = {23},
  author   = {Deriso, Dave and Boyd, Stephen},
  langid   = {english},
  file     = {Deriso and Boyd - A General Optimization Framework for Dynamic Time .pdf:C\:\\Users\\Afrooz\\Zotero\\storage\\FBH4NG2X\\Deriso and Boyd - A General Optimization Framework for Dynamic Time .pdf:application/pdf}
}

@inproceedings{keogh_derivative_2001,
  title      = {Derivative Dynamic Time Warping},
  isbn       = {978-0-89871-495-1 978-1-61197-271-9},
  url        = {https://epubs.siam.org/doi/10.1137/1.9781611972719.1},
  doi        = {10.1137/1.9781611972719.1},
  eventtitle = {Proceedings of the 2001 {SIAM} International Conference on Data Mining},
  pages      = {1--11},
  booktitle  = {Proceedings of the 2001 {SIAM} International Conference on Data Mining},
  publisher  = {Society for Industrial and Applied Mathematics},
  author     = {Keogh, Eamonn J. and Pazzani, Michael J.},
  urldate    = {2021-03-10},
  date       = {2001-04-05},
  langid     = {english},
  file       = {Keogh and Pazzani - 2001 - Derivative Dynamic Time Warping.pdf:C\:\\Users\\Afrooz\\Zotero\\storage\\W2Y9XGPD\\Keogh and Pazzani - 2001 - Derivative Dynamic Time Warping.pdf:application/pdf}
}

@article{sakoe_dynamic_1978,
  title        = {Dynamic programming algorithm optimization for spoken word recognition},
  volume       = {26},
  issn         = {0096-3518},
  doi          = {10.1109/TASSP.1978.1163055},
  abstract     = {This paper reports on an optimum dynamic progxamming ({DP}) based time-normalization algorithm for spoken word recognition. First, a general principle of time-normalization is given using time-warping function. Then, two time-normalized distance definitions, called symmetric and asymmetric forms, are derived from the principle. These two forms are compared with each other through theoretical discussions and experimental studies. The symmetric form algorithm superiority is established. A new technique, called slope constraint, is successfully introduced, in which the warping function slope is restricted so as to improve discrimination between words in different categories. The effective slope constraint characteristic is qualitatively analyzed, and the optimum slope constraint condition is determined through experiments. The optimized algorithm is then extensively subjected to experimental comparison with various {DP}-algorithms, previously applied to spoken word recognition by different research groups. The experiment shows that the present algorithm gives no more than about two-thirds errors, even compared to the best conventional algorithm.},
  pages        = {43--49},
  number       = {1},
  journaltitle = {{IEEE} Transactions on Acoustics, Speech, and Signal Processing},
  author       = {Sakoe, H. and Chiba, S.},
  date         = {1978-02},
  note         = {Conference Name: {IEEE} Transactions on Acoustics, Speech, and Signal Processing},
  keywords     = {Acoustics, Constraint optimization, Dynamic programming, Feature extraction, Fluctuations, Heuristic algorithms, Pattern matching, Signal processing algorithms, Speech processing, Timing},
  file         = {IEEE Xplore Abstract Record:/home/alex/Zotero/storage/EZZSAKLP/1163055.html:text/html;IEEE Xplore Full Text PDF:/home/alex/Zotero/storage/ULUI57E9/Sakoe and Chiba - 1978 - Dynamic programming algorithm optimization for spo.pdf:application/pdf}
}

@misc{techpowerup,
  author   = {TechPowerUp},
  title    = {{NVIDIA} {GeForce} {RTX} 2060 {SUPER} {Specs}},
  url      = {https://www.techpowerup.com/gpu-specs/geforce-rtx-2060-super.c3441},
  abstract = {NVIDIA TU106, 1650 MHz, 2176 Cores, 136 TMUs, 64 ROPs, 8192 MB GDDR6, 1750 MHz, 256 bit},
  language = {en},
  urldate  = {2021-02-28},
  journal  = {TechPowerUp},
  file     = {Snapshot:/home/alex/Zotero/storage/2BZU9AK7/geforce-rtx-2060-super.html:text/html}
}

@misc{nvidia_flops,
  title   = {Achieved {FLOPs}},
  url     = {https://docs.nvidia.com/gameworks/content/developertools/desktop/analysis/report/cudaexperiments/kernellevel/achievedflops.htm},
  urldate = {2021-02-28},
  author  = {NVIDIA},
  file    = {Achieved FLOPs:/home/alex/Zotero/storage/UQWSPEQW/achievedflops.html:text/html}
}

@article{salvador_fastdtw_2004,
  title    = {{FastDTW}: Toward Accurate Dynamic Time Warping in Linear Time and Space},
  abstract = {The dynamic time warping ({DTW}) algorithm is able to find the optimal alignment between two time series. It is often used to determine time series similarity, classification, and to find corresponding regions between two time series. {DTW} has a quadratic time and space complexity that limits its use to only small time series data sets. In this paper we introduce {FastDTW}, an approximation of {DTW} that has a linear time and space complexity. {FastDTW} uses a multilevel approach that recursively projects a solution from a coarse resolution and refines the projected solution. We prove the linear time and space complexity of {FastDTW} both theoretically and empirically. We also analyze the accuracy of {FastDTW} compared to two other existing approximate {DTW} algorithms: Sakoe-Chuba Bands and Data Abstraction. Our results show a large improvement in accuracy over the existing methods.},
  pages    = {11},
  date     = {2004},
  author   = {Salvador, Stan and Chan, Philip},
  langid   = {english},
  file     = {Salvador and Chan - FastDTW Toward Accurate Dynamic Time Warping in L.pdf:/home/alex/Zotero/storage/9SH3QRYE/Salvador and Chan - FastDTW Toward Accurate Dynamic Time Warping in L.pdf:application/pdf}
}

@article{wu_fastdtw_2020,
  title        = {{FastDTW} is approximate and Generally Slower than the Algorithm it Approximates},
  issn         = {1041-4347, 1558-2191, 2326-3865},
  url          = {http://arxiv.org/abs/2003.11246},
  doi          = {10.1109/TKDE.2020.3033752},
  abstract     = {Many time series data mining problems can be solved with repeated use of distance measure. Examples of such tasks include similarity search, clustering, classification, anomaly detection and segmentation. For over two decades it has been known that the Dynamic Time Warping ({DTW}) distance measure is the best measure to use for most tasks, in most domains. Because the classic {DTW} algorithm has quadratic time complexity, many ideas have been introduced to reduce its amortized time, or to quickly approximate it. One of the most cited approximate approaches is {FastDTW}. The {FastDTW} algorithm has well over a thousand citations and has been explicitly used in several hundred research efforts. In this work, we make a surprising claim. In any realistic data mining application, the approximate {FastDTW} is much slower than the exact {DTW}. This fact clearly has implications for the community that uses this algorithm: allowing it to address much larger datasets, get exact results, and do so in less time.},
  pages        = {1--1},
  journaltitle = {{IEEE} Transactions on Knowledge and Data Engineering},
  shortjournal = {{IEEE} Trans. Knowl. Data Eng.},
  author       = {Wu, Renjie and Keogh, Eamonn J.},
  urldate      = {2021-03-18},
  date         = {2020},
  eprinttype   = {arxiv},
  eprint       = {2003.11246},
  keywords     = {Computer Science - Machine Learning, Statistics - Machine Learning},
  file         = {arXiv Fulltext PDF:/home/alex/Zotero/storage/G9QLEV2V/Wu and Keogh - 2020 - FastDTW is approximate and Generally Slower than t.pdf:application/pdf;arXiv.org Snapshot:/home/alex/Zotero/storage/U6PFVPIG/2003.html:text/html}
}

@article{jain_semi-metrification_2018,
  title        = {Semi-Metrification of the Dynamic Time Warping Distance},
  url          = {http://arxiv.org/abs/1808.09964},
  abstract     = {The dynamic time warping (dtw) distance fails to satisfy the triangle inequality and the identity of indiscernibles. As a consequence, the dtw-distance is not warping-invariant, which in turn results in peculiarities in data mining applications. This article converts the dtw-distance to a semi-metric and shows that its canonical extension is warping-invariant. Empirical results indicate that the nearest-neighbor classifier in the proposed semi-metric space performs comparably to the same classifier in the standard dtw-space. To overcome the undesirable peculiarities of dtw-spaces, this result suggests to further explore the semi-metric space for data mining applications.},
  journaltitle = {{arXiv}:1808.09964 [cs, stat]},
  author       = {Jain, Brijnesh J.},
  urldate      = {2021-03-18},
  date         = {2018-09-02},
  eprinttype   = {arxiv},
  eprint       = {1808.09964},
  keywords     = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning, Statistics - Machine Learning},
  file         = {arXiv Fulltext PDF:C\:\\Users\\jekyllo\\Zotero\\storage\\N6J4YMLJ\\Jain - 2018 - Semi-Metrification of the Dynamic Time Warping Dis.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\jekyllo\\Zotero\\storage\\IAXTQJVP\\1808.html:text/html}
}

@online{jordan_normalizing_2018,
  title    = {Normalizing your data (specifically, input and batch normalization).},
  url      = {https://www.jeremyjordan.me/batch-normalization/},
  abstract = {In this post, I'll discuss considerations for normalizing your data - with a specific focus on neural networks. In order to understand the concepts discussed, it's important to have an understanding of gradient descent.  As a quick refresher, when training neural networks we'll feed in  observations and compare the expected},
  author   = {Jeremy Jordan},
  urldate  = {2021-03-18},
  date     = {2018-01-27},
  langid   = {english},
  file     = {Snapshot:C\:\\Users\\jekyllo\\Zotero\\storage\\KDD2FLVU\\batch-normalization.html:text/html}
}