-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDgeData.h
75 lines (72 loc) · 2.3 KB
/
DgeData.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/**
* \class DgeData
*
* \brief Digital gene expression data
*
* \author Bill White
* \version 1.0
*
* Contact: [email protected]
* Created on: 1/18/12
*/
#ifndef DGEDATA_H_
#define DGEDATA_H_
class DgeData {
public:
DgeData();
virtual ~DgeData();
/// Create a new set of DGE data with a counts file and a phenotype file
bool LoadData(std::string countsFile, std::string normsFile="");
/// Get the sample names/IDs
std::vector<std::string> GetSampleNames();
/// Get the gene names/IDs
std::vector<std::string> GetGeneNames();
/// Get the min and max values for gene at index
std::pair<double, double> GetGeneMinMax(int geneIndex);
/// Get the sum of raw counts for the gene at index
double GetGeneCountsSum(int geneIndex);
/// Get the number of samples
int GetNumSamples();
/// Get the number of genes
int GetNumGenes();
/// Get sample counts for sample at index
std::vector<double> GetSampleCounts(int sampleIndex);
/// Get the phenotype at sample index
int GetSamplePhenotype(int sampleIndex);
/// Get the normalization factors
std::vector<double> GetNormalizationFactors();
/// Print the Sample statistics to the console
void PrintSampleStats();
/// Get the original data set filename
std::string GetCountsFilename();
// Get the sequencing depth for a sample at index
unsigned int GetSequencingDepthForSample(unsigned int sampleIndex);
private:
/// Filename containing DGE counts
std::string countsFilename;
/// Filename containing DGE phenotypes
std::string phenosFilename;
/// Filename containing DGE normalization factors
std::string normsFilename;
/// Are we using normalization?
bool hasNormFactors;
/// Vector of (optional) normalization factors for each sample
std::vector<double> normFactors;
/// Gene names
std::vector<std::string> geneNames;
/// Digital gene expression counts
std::vector<std::vector<double> > counts;
/// Sample names
std::vector<std::string> sampleNames;
/// Sample phenotypes
std::vector<int> phenotypes;
/// Min and max count for genes
std::vector<std::pair<double, double> > minMaxGeneCounts;
/// Min and max values for samples
std::vector<std::pair<double, double> > minMaxSampleCounts;
/// Gene count sums
std::vector<double> sumGeneCounts;
/// Zero count sample indices
std::vector<std::vector<int> > sampleZeroes;
};
#endif /* DGEDATA_H_ */