From f568ff5fe524bdbf6dc5eec034548f2d4879a681 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Tue, 17 Sep 2024 09:56:30 +0100 Subject: [PATCH] Remove the mpileup BAM_CREF_SKIP filter. Mpileup removes alignments using the cigar ref skip operator ("N"). This was originally added in 2011 in samtools/samtools#d1643d6 with the commit message of "fixed a bug in indel calling related to unmapped and refskip reads". Unfortunately I don't know what that bug was, but removing the code shows it still works (at least for some data!). We need better understanding of what's going on and why it was added, so perhaps we should add a command line option to control this instead? Fixes #2277 --- bam2bcf_edlib.c | 11 +---------- bam2bcf_indel.c | 11 +---------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/bam2bcf_edlib.c b/bam2bcf_edlib.c index 4e0a38c3..4a04b8da 100644 --- a/bam2bcf_edlib.c +++ b/bam2bcf_edlib.c @@ -1559,19 +1559,10 @@ int bcf_edlib_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, } } - int qbeg, qpos, qend, tbeg, tend, kk; + int qbeg, qpos, qend, tbeg, tend; uint8_t *seq = bam_get_seq(p->b); - uint32_t *cigar = bam_get_cigar(p->b); if (p->b->core.flag & BAM_FUNMAP) continue; - // FIXME: the following loop should be better moved outside; - // nonetheless, realignment should be much slower anyway. - for (kk = 0; kk < p->b->core.n_cigar; ++kk) - if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) - break; - if (kk < p->b->core.n_cigar) - continue; - // determine the start and end of sequences for alignment int left2 = left, right2 = right; int min_win_size = MAX(-biggest_del, biggest_ins); diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c index 975504f8..6cbee23f 100644 --- a/bam2bcf_indel.c +++ b/bam2bcf_indel.c @@ -845,19 +845,10 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, } } - int qbeg, qpos, qend, tbeg, tend, kk; + int qbeg, qpos, qend, tbeg, tend; uint8_t *seq = bam_get_seq(p->b); - uint32_t *cigar = bam_get_cigar(p->b); if (p->b->core.flag & BAM_FUNMAP) continue; - // FIXME: the following loop should be better moved outside; - // nonetheless, realignment should be much slower anyway. - for (kk = 0; kk < p->b->core.n_cigar; ++kk) - if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) - break; - if (kk < p->b->core.n_cigar) - continue; - // determine the start and end of sequences for alignment // FIXME: loops over CIGAR multiple times int left2 = left, right2 = right;