forked from pockerman/hidden_markov_modeling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.json
138 lines (135 loc) · 3.12 KB
/
config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
{
"regions":{
"start":[1000000],
"end":[10000000]
},
"regions_files":["/home/a/ag568/region_0.txt"],
"chromosome": "chr1",
"reference_file":{
"filename": "/scratch/spectre/a/ag568/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
},
"no_wga_file": {
"filename": "/scratch/spectre/a/ag568/m585_verysensitive_trim_sorted.bam"
},
"test_file":{
"filename": "/scratch/spectre/a/ag568/m605_verysensitive_trim_sorted.bam"
},
"debug":{
"log_bam_for_debug":false
},
"window_size":100,
"fill_missing_window_data":false,
"fill_missing_window_data_factor":0,
"quality_threshold":null,
"save_windows":false,
"remove_windows_with_N":false,
"mark_N_windows":true,
"mark_for_N_windows":-999.0,
"save_cluster_dbi":true,
"n_windows_dist":{
"name":"uniform",
"config":{
"parameters":[-999.5, -998.5]
}
},
"windows_filename":"hmm_windows.json",
"check_windowing_sanity":true,
"outlier_remove":{
"name":"zscore",
"config":{
"sigma_factor":2
}
},
"label_clusters": true,
"labeler":{
"name":"mean_diff",
"tuf_mean_min": 1.5,
"tuf_mean_max": 8.5,
"states":["DELETE", "OTHER", "OTHER", "TUF"]
},
"clusterer":{
"name":"kmedoids",
"config":{
"init_cluster_idx":"random_from_data",
"metric":"MANHATAN",
"features":["mean"],
"n_clusters":4
}
},
"clusters":{
"cluster_0":{
"filename":"/home/a/ag568/cluster_0.txt",
"state": "normal",
"distributions":{
"wga":{
"type":"distribution",
"name":"normal"
},
"no_wga":{
"type":"distribution",
"name":"normal"
}
}
},
"cluster_1":{
"filename":"/home/a/ag568/cluster_1.txt",
"state": "tuf",
"distributions":{
"wga":{
"type":"gmm",
"uniform":{
"params":[2.0, 8.0]
},
"names":["normal", "uniform"],
"weights":null
},
"no_wga":{
"type":"gmm",
"uniform":{
"params":[2.0, 8.0]
},
"names":["normal", "uniform"],
"weights":null
}
}
}
},
"HMM": {
"name":"HMM_Model",
"train":true,
"train_solver": "baum-welch",
"lr_decay":0.7,
"inertia":0.6,
"verbose":true,
"save_model":true,
"save_hmm_filename":"/home/a/ag568/HMM_Model",
"states":{
"tuf":{
"start_prob":0.48
},
"normal":{
"start_prob":0.48
},
"gap_state":{
"start_prob":0.04
}
},
"transitions":{
"tuf-tuf":0.95,
"normal-normal":0.95,
"tuf-normal":0.05,
"normal-tuf":0.05,
"gap_state-gap_state":0.95,
"gap_state-tuf":0.05,
"tuf-gap_state":0.05,
"normal-gap_state":0.05,
"gap_state-normal":0.05
},
"train_sequence_size":100,
"train_sequence_source":"region",
"train_n_sequences_per_source":10000,
"train_windowtype":"both"
},
"logger_file":"tuf.log",
"logger_level":"INFO"
}