diff --git a/sam.c b/sam.c index 9f415dbc6..350b8da45 100644 --- a/sam.c +++ b/sam.c @@ -6219,14 +6219,24 @@ int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { if (!mm) return 0; if (mm[0] != 'Z') { - hts_log_error("MM tag is not of type Z"); + hts_log_error("%s: MM tag is not of type Z", bam_get_qname(b)); + return -1; + } + + uint8_t *mi = bam_aux_get(b, "MZ"); + if (mi && bam_aux2i(mi) != b->core.l_qseq) { + // bam_aux2i with set errno = EINVAL and return 0 if the tag + // isn't integer, but 0 will be a seq-length mismatch anyway so + // triggers an error here too. + hts_log_error("%s: MM/MZ data length is incompatible with" + " SEQ length", bam_get_qname(b)); return -1; } uint8_t *ml = bam_aux_get(b, "ML"); if (!ml) ml = bam_aux_get(b, "Ml"); if (ml && (ml[0] != 'B' || ml[1] != 'C')) { - hts_log_error("ML tag is not of type B,C"); + hts_log_error("%s: ML tag is not of type B,C", bam_get_qname(b)); return -1; } uint8_t *ml_end = ml ? ml+6 + le_to_u32(ml+2) : NULL; @@ -6312,7 +6322,8 @@ int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { delta = strtol(cp, &cp_end, 10); if (cp_end == cp) { - hts_log_error("Hit end of MM tag. Missing semicolon?"); + hts_log_error("%s: Hit end of MM tag. Missing " + "semicolon?", bam_get_qname(b)); return -1; } @@ -6341,8 +6352,8 @@ int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { state->implicit [mod_num] = implicit; if (delta < 0) { - hts_log_error("MM tag refers to bases beyond sequence " - "length"); + hts_log_error("%s: MM tag refers to bases beyond sequence " + "length", bam_get_qname(b)); return -1; } state->MMcount [mod_num] = delta; @@ -6357,7 +6368,8 @@ int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { } if (++mod_num >= MAX_BASE_MOD) { - hts_log_error("Too many base modification types"); + hts_log_error("%s: Too many base modification types", + bam_get_qname(b)); return -1; } ms++; n++; @@ -6375,7 +6387,8 @@ int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { } } if (ml > ml_end) { - hts_log_error("Insufficient number of entries in ML tag"); + hts_log_error("%s: Insufficient number of entries in ML " + "tag", bam_get_qname(b)); return -1; } } else { @@ -6387,7 +6400,8 @@ int bam_parse_basemod(const bam1_t *b, hts_base_mod_state *state) { cp++; } if (!*cp) { - hts_log_error("Hit end of MM tag. Missing semicolon?"); + hts_log_error("%s: Hit end of MM tag. Missing semicolon?", + bam_get_qname(b)); return -1; } } diff --git a/test/base_mods/MM-MZf1.sam b/test/base_mods/MM-MZf1.sam new file mode 100644 index 000000000..35074fd05 --- /dev/null +++ b/test/base_mods/MM-MZf1.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MZ:i:37 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,6,159,240,215 +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r3 0 I 11 0 10S20M6S * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+mh,2,2,0,0,4,1;N+n,15,2; Ml:B:C,128,0,153,0,0,159,179,0,204,0,230,6,215,240 MZ:i:36 diff --git a/test/base_mods/MM-MZf2.sam b/test/base_mods/MM-MZf2.sam new file mode 100644 index 000000000..843f93a1b --- /dev/null +++ b/test/base_mods/MM-MZf2.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MZ:i:36 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,6,159,240,215 +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r3 0 I 11 0 10S20M6S * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+mh,2,2,0,0,4,1;N+n,15,2; Ml:B:C,128,0,153,0,0,159,179,0,204,0,230,6,215,240 MZ:f:36 diff --git a/test/base_mods/MM-MZp.sam b/test/base_mods/MM-MZp.sam new file mode 100644 index 000000000..836a09725 --- /dev/null +++ b/test/base_mods/MM-MZp.sam @@ -0,0 +1,5 @@ +@SQ SN:I LN:999 +r1 0 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MZ:i:36 +r1- 16 I 1 0 36M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:G-m,0,1,4,1,2;G-h,0,7;N-n,17,2; Ml:B:C,230,204,179,153,128,6,159,240,215 +r2 0 I 4 0 3S33M * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r3 0 I 11 0 10S20M6S * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A Mm:Z:C+mh,2,2,0,0,4,1;N+n,15,2; Ml:B:C,128,0,153,0,0,159,179,0,204,0,230,6,215,240 MZ:i:36 diff --git a/test/base_mods/MM-multi.sam b/test/base_mods/MM-multi.sam index b2259a09e..1c7288f50 100644 --- a/test/base_mods/MM-multi.sam +++ b/test/base_mods/MM-multi.sam @@ -3,5 +3,5 @@ @CO r2 has them combined together, for example as produced by @CO a joint basecaller which assigns probabilities to all @CO trained events simultaneously. -r1 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 +r1 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+h,6,7;N+n,15,2; Ml:B:C,128,153,179,204,230,159,6,215,240 MZ:i:36 r2 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+mh,2,2,0,0,4,1;N+n,15; Ml:B:C,77,159,103,133,128,108,154,82,179,57,204,31,240 diff --git a/test/base_mods/base-mods.sh b/test/base_mods/base-mods.sh index f3f3ca4b7..388ff369e 100755 --- a/test/base_mods/base-mods.sh +++ b/test/base_mods/base-mods.sh @@ -31,5 +31,6 @@ test_mod="../test_mod" pileup_mod="../pileup_mod" test_driver $@ +rm _err.tmp _out.tmp exit $? diff --git a/test/base_mods/base-mods.tst b/test/base_mods/base-mods.tst index 3809c0e6e..d246223c8 100644 --- a/test/base_mods/base-mods.tst +++ b/test/base_mods/base-mods.tst @@ -42,3 +42,9 @@ P MM-explicit-x.out $test_mod -x MM-explicit.sam # Pileup testing P MM-pileup.out $pileup_mod < MM-pileup.sam P MM-pileup2.out $pileup_mod < MM-pileup2.sam + +# Validation testing. We just care about exit status here, but the +# test data is a copy of MM-pileup.sam so that suffices too. +P MM-pileup.out $pileup_mod < MM-MZp.sam +F MM-pileup.out $pileup_mod < MM-MZf1.sam +F MM-pileup.out $pileup_mod < MM-MZf2.sam diff --git a/test/pileup_mod.c b/test/pileup_mod.c index 95c353771..323c0c6c2 100644 --- a/test/pileup_mod.c +++ b/test/pileup_mod.c @@ -73,7 +73,8 @@ void process_pileup(sam_hdr_t *h, const bam_pileup1_t *p, // as each new read is added or removed from the pileups. int pileup_cd_create(void *data, const bam1_t *b, bam_pileup_cd *cd) { hts_base_mod_state *m = hts_base_mod_state_alloc(); - bam_parse_basemod(b, m); + if (bam_parse_basemod(b, m) < 0) + return -1; cd->p = m; return 0; } @@ -201,7 +202,7 @@ int main(int argc, char **argv) { bam_plp_destructor(iter, pileup_cd_destroy); const bam_pileup1_t *p; - int tid, pos, n; + int tid, pos, n = 0; while ((p = bam_plp_auto(iter, &tid, &pos, &n)) != 0) { switch (compact) { case 0: @@ -221,5 +222,5 @@ int main(int argc, char **argv) { bam_destroy1(b); sam_hdr_destroy(h); - return 0; + return n != 0; }