forked from amathelier/DNAshapedTFBS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDNAshapedTFBS_constants.py
executable file
·33 lines (28 loc) · 1.56 KB
/
DNAshapedTFBS_constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# PATH VARIABLES
BWTOOL = "bwtool"
CUMULATIVE_EXPERIMENTS_PATH = "CUMULATIVE_EXPERIMENTS.csv"
CUMULATIVE_AUPRC_AUROC_PATH = "CUMULATIVE_AUPRC_AUROC.csv"
DNASHAPEINTER = {'HelT': (30.94, 38.05), 'MGW': (2.85, 6.20),
'ProT': (-16.51, -0.03), 'Roll': (-8.57, 8.64)}
# FEATURE VECTOR TYPE CONSTANTS
SEQ_AND_DNA_SHAPE_TYPE_CONSTANT = 0
DNA_SHAPE_ONLY_TYPE_CONSTANT = 1
DNA_SHAPE_AND_FLEX_TYPE_CONSTANT = 2
DNA_SHAPE_FEATURE_TYPE_CONSTANTS = {SEQ_AND_DNA_SHAPE_TYPE_CONSTANT, DNA_SHAPE_ONLY_TYPE_CONSTANT,
DNA_SHAPE_AND_FLEX_TYPE_CONSTANT}
SEQ_AND_FLEX_TYPE_CONSTANT = 3
FLEX_ONLY_TYPE_CONSTANT = 4
FLEXIBILITY_TYPE_CONSTANTS = {SEQ_AND_FLEX_TYPE_CONSTANT, FLEX_ONLY_TYPE_CONSTANT,
DNA_SHAPE_AND_FLEX_TYPE_CONSTANT}
SEQ_FEATURE_INCLUDED_CONSTANTS = {SEQ_AND_FLEX_TYPE_CONSTANT, SEQ_AND_FLEX_TYPE_CONSTANT}
# SEQUENCE FEATURE TYPE CONSTANTS
PSSM_SCORE_TYPE_CONSTANT = 0
TFFM_SCORE_TYPE_CONSTANT = 1
BINARY_ENCODING_TYPE_CONSTANT = 2
MAX_MOTIF_LENGTH = 20
SHAPE_FEATURE_NAMES = ['HelT', 'ProT', 'MGW', 'Roll', 'HelT2', 'MGW2', 'Roll2']
TRI_NUC_CLASSES = ['AAT', 'AAA', 'CCA', 'AAC', 'ACT', 'CCG', 'ATC', 'AAG', 'CGC', 'AGG', 'GAA', 'ACG', 'ACC',
'GAC', 'CCC', 'ACA', 'CGA', 'GGA', 'CAA', 'AGC', 'GTA', 'AGA', 'CTC', 'CAC', 'TAA', 'GCA',
'CTA', 'GCC', 'ATG', 'CAG', 'ATA', 'TCA']
# CURRENTLY: 1 sequence feature + (MOTIF_LENGTH * NUM_SHAPES) shape features + Flex_Eval + Trinuc_Counts
ALL_FEATURES_COUNT = 1 + (MAX_MOTIF_LENGTH * len(SHAPE_FEATURE_NAMES)) + 1 + len(TRI_NUC_CLASSES)