-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDistanceMetrics.h
129 lines (124 loc) · 6.27 KB
/
DistanceMetrics.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/**
* \file DistanceMetrics.h
*
* \brief Distance metrics for ReliefF.
*
* \author: Bill White
* \version 1.0
*
* Contact: [email protected]
* Created on 3/29/11
*/
#ifndef DISTANCEMETRICS_H
#define DISTANCEMETRICS_H
/// Forward reference to a DatasetInstance class.
class DatasetInstance;
/***************************************************************************//**
* Check for a missing discrete value and return value.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return pair: has missing value?, value for missing (true) or 0.0 (false)
******************************************************************************/
std::pair<bool, double> CheckMissing(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Check for a missing continuous value and return value.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return pair: has missing value?, value for missing (true) or 0.0 (false)
******************************************************************************/
std::pair<bool, double> CheckMissingNumeric(unsigned int numericIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Normalizes a given value of a numeric attribute.
* Borrowed from Weka 8/18/11
* \param [in] x value
* \param [in] minX minimum value for x
* \param [in] maxX maximum value for x
* \return normalized value
******************************************************************************/
double norm(double x, double minX, double maxX);
/***************************************************************************//**
* Allele mismatch metric.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return diff(erence) between attribute values: 0.0, 0.5, 1.0
******************************************************************************/
double diffAMM(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Genotype mismatch metric.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return diff(erence) between attribute values: 0.0 (same) or 1.0 (not same)
****************************************************************************/
double diffGMM(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Nucleotide count array (NCA) metric.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return diff(erence) considering nucleotide counts
****************************************************************************/
double diffNCA(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Nucleotide count array + G/C sum + A/T sum (NCA6) metric.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return diff(erence) considering nucleotide counts
****************************************************************************/
double diffNCA6(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Kimura distance - considers transition/transversion mutation types
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return diff(erence) considering nucleotide mutation types
****************************************************************************/
double diffKM(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* "Manhattan" distance between continuous attributes.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return absolute value of difference divided by attribute's range
******************************************************************************/
double diffManhattan(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Euclidean distance between continuous attributes.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return absolute value of difference divided by attribute's range
******************************************************************************/
double diffEuclidean(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/***************************************************************************//**
* Same as "Manhattan" distance but uses method calls versus public variables.
* \param [in] attributeIndex index into the vector of attributes
* \param [in] dsi1 data set instance 1
* \param [in] dsi2 data set instance 2
* \return absolute value of difference divided by attribute's range
******************************************************************************/
double diffPredictedValueTau(DatasetInstance* dsi1,
DatasetInstance* dsi2);
#endif /* DISTANCEMETRICS_H */