-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmask.c
127 lines (113 loc) · 3.6 KB
/
mask.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include <sys/stat.h>
#include <sys/types.h>
#include "cfile.h"
static int usage() {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: yame mask [options] <in.cg> <mask.cx>\n");
fprintf(stderr, "\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " -o output cx file name. if missing, output to stdout without index.\n");
fprintf(stderr, " -c contextualize to format 6 using '1's in mask.\n");
fprintf(stderr, " if format 3 is used as mask, then use M+U>0 (coverage).\n");
fprintf(stderr, " -v reverse the mask (default is to mask '1's, if -v will mask '0's).\n");
fprintf(stderr, " -h This help\n");
fprintf(stderr, "\n");
return 1;
}
void mask_fmt3(cdata_t *c, cdata_t c_mask, BGZF *fp_out) {
for (uint64_t i=0; i<c->n; ++i) {
if (FMT0_IN_SET(c_mask, i)) {
f3_set_mu(c, i, 0, 0);
}
}
cdata_compress(c);
cdata_write1(fp_out, c);
}
void mask_fmt0(cdata_t *c, cdata_t c_mask, BGZF *fp_out) {
for (uint64_t i=0; i<cdata_nbytes(c); ++i) {
c->s[i] &= ~c_mask.s[i];
}
/* cdata_compress(&c); */
cdata_write1(fp_out, c);
}
void fmt0ContextualizeFmt6(cdata_t *c, cdata_t c_mask, BGZF *fp_out) {
cdata_t c6 = {.fmt = '6', .n = c->n};
c6.s = calloc((c6.n+3)/4, sizeof(uint8_t));
for (uint64_t i=0; i<c6.n; ++i) {
if (FMT0_IN_SET(c_mask,i)) { // mask is used as universe, use -v to invert
if (FMT0_IN_SET(*c, i)) FMT6_SET1(c6, i);
else FMT6_SET0(c6, i);
}
}
cdata_compress(&c6);
cdata_write1(fp_out, &c6);
free_cdata(&c6);
}
int main_mask(int argc, char *argv[]) {
int c, reverse = 0, contextualize_to_fmt6 = 0;
char *fname_out = NULL;
while ((c = getopt(argc, argv, "o:cvh"))>=0) {
switch (c) {
case 'o': fname_out = strdup(optarg); break;
case 'c': contextualize_to_fmt6 = 1; break;
case 'v': reverse = 1; break;
case 'h': return usage(); break;
default: usage(); wzfatal("Unrecognized option: %c.\n", c);
}
}
if (optind + 2 > argc) {
usage();
wzfatal("Please supply input file.\n");
}
char *fname = argv[optind];
char *fname_mask = argv[optind+1];
cfile_t cf_mask = open_cfile(fname_mask);
cdata_t c_mask = read_cdata1(&cf_mask);
if (c_mask.fmt == '1') convertToFmt0(&c_mask);
if (c_mask.fmt == '3') convertToFmt0(&c_mask);
if (c_mask.fmt != '0') wzfatal("Mask format not supported.");
if (reverse) {
for (uint64_t i=0; i<cdata_nbytes(&c_mask); ++i) {
c_mask.s[i] = ~(c_mask.s[i]);
}
}
BGZF *fp_out;
if (fname_out) fp_out = bgzf_open2(fname_out, "w");
else fp_out = bgzf_dopen(fileno(stdout), "w");
if (fp_out == NULL) {
fprintf(stderr, "Error opening file for writing: %s\n", fname_out);
exit(1);
}
cfile_t cf = open_cfile(fname);
while (1) {
cdata_t c = read_cdata1(&cf);
if (c.n == 0) break;
if (c.fmt == '1') convertToFmt0(&c);
decompress2(&c);
if (c.n != c_mask.n) {
fprintf(stderr, "[%s:%d] mask (n=%"PRIu64") and query (N=%"PRIu64") are of different lengths.\n", __func__, __LINE__, c_mask.n, c.n);
fflush(stderr);
exit(1);
}
if (c.fmt == '3') {
mask_fmt3(&c, c_mask, fp_out);
} else if (c.fmt == '0') {
if (contextualize_to_fmt6) {
fmt0ContextualizeFmt6(&c, c_mask, fp_out);
} else {
mask_fmt0(&c, c_mask, fp_out);
}
} else {
fprintf(stderr, "[%s:%d] Only format %d files are supported.\n", __func__, __LINE__, c.fmt);
fflush(stderr);
exit(1);
}
free_cdata(&c);
}
if (fname_out) free(fname_out);
bgzf_close(fp_out);
free_cdata(&c_mask);
bgzf_close(cf.fh);
bgzf_close(cf_mask.fh);
return 0;
}