-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathReliefF.h
138 lines (130 loc) · 5.54 KB
/
ReliefF.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/**
* \class ReliefF
*
* \brief ReliefF attribute ranking algorithm.
*
* Totally redone for the McKinney in silico lab in 2011.
* Large refactoring to move all attribute elimination handling to the
* Dataset and its subclasses. 9/11/11
*
* \sa RReliefF
*
* \author Bill White
* \version 1.0
*
* Contact: [email protected]
* Created on: 7/16/05
*/
#ifndef RELIEFF_H
#define RELIEFF_H
#include <vector>
#include <fstream>
#include <boost/program_options.hpp>
#include "AttributeRanker.h"
#include "Dataset.h"
#include "Insilico.h"
namespace po = boost::program_options;
class ReliefF : public AttributeRanker
{
public:
/*************************************************************************//**
* Construct an ReliefF algorithm object.
* \param [in] ds pointer to a Dataset object
* \param [in] anaType analysis type
****************************************************************************/
ReliefF(Dataset* ds, AnalysisType anaType);
/*************************************************************************//**
* Construct an ReliefF algorithm object.
* \param [in] ds pointer to a Dataset object
* \param [in] vm reference to a Boost map of command line options
* \param [in] anaType analysis type
****************************************************************************/
ReliefF(Dataset* ds, po::variables_map& vm, AnalysisType anaType);
/*************************************************************************//**
* Construct an ReliefF algorithm object.
* \param [in] ds pointer to a Dataset object
* \param [in] configMap reference to a ConfigMap (map<string, string>)
* \param [in] anaType analysis type
****************************************************************************/
ReliefF(Dataset* ds, ConfigMap& vm, AnalysisType anaType);
virtual ~ReliefF();
/**
* Compute the ReliefF scores for the current set of attributes.
* Implements ReliefF algorithm:
* Marko Robnik-Sikonja, Igor Kononenko: Theoretical and Empirical Analysis of
* ReliefF and RReliefF. Machine Learning Journal, 53:23-69, 2003
* http://lkm.fri.uni-lj.si/rmarko/papers/robnik03-mlj.pdf
*/
virtual bool ComputeAttributeScores();
/// Compute the ReliefF scores by iteratively removing worst attributes.
bool ComputeAttributeScoresIteratively();
/// Resets some data structures for the next iteration of ReliefF
bool ResetForNextIteration();
/*************************************************************************//**
* Write the scores and attribute names to stream.
* \param [in] outStream stream to write score-attribute name pairs
****************************************************************************/
void PrintAttributeScores(std::ofstream& outStream);
/*************************************************************************//**
* Write the scores and attribute names to file.
* \param [in] baseFIlename filename to write score-attribute name pairs
****************************************************************************/
void WriteAttributeScores(std::string baseFilename);
/// Precompute all pairwise instance-to-instance distances.
bool PreComputeDistances();
/// Overrides base class method.
AttributeScores GetScores();
/// Implements AttributeRanker interface.
AttributeScores ComputeScores();
private:
/// no default constructor
ReliefF();
protected:
/// Compute theconst AttributeScores& ComputeScores(); weight by distance factors for nearest neighbors.
bool ComputeWeightByDistanceFactors();
/// type of analysis to perform
AnalysisType analysisType;
/*************************************************************************//**
* Compute the discrete difference in an attribute between two instances.
* \param [in] attributeIndex index into vector of all attributes
* \param [in] dsi1 pointer to DatasetInstance 1
* \param [in] dsi2 pointer to DatasetInstance 2
* \return diff(erence)
****************************************************************************/
double (*snpDiff)(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/*************************************************************************//**
* Compute the continuous difference in an attribute between two instances.
* \param [in] attributeIndex index into vector of all attributes
* \param [in] dsi1 pointer to DatasetInstance 1
* \param [in] dsi2 pointer to DatasetInstance 2
* \return diff(erence)
****************************************************************************/
double (*numDiff)(unsigned int attributeIndex,
DatasetInstance* dsi1,
DatasetInstance* dsi2);
/// the name of discrete diff(erence) function
std::string snpMetric;
/// the name of continuous diff(erence) function
std::string numMetric;
/// number of instances to sample
unsigned int m;
/// are instances being randomly selected?
bool randomlySelect;
/// number of attributes to remove each iteration if running iteratively
unsigned int removePerIteration;
/// are we removing a percentage per iteration?
bool doRemovePercent;
/// percentage of attributes to remove per iteration if running iteratively
double removePercentage;
/// name of the weight-by-distance method
std::string weightByDistanceMethod;
/// sigma value used in exponential decay weight-by-distance
double weightByDistanceSigma;
/// attribute scores/weights
std::vector<double> W;
/// attribute names associated with scores
std::vector<std::string> scoreNames;
};
#endif