diff --git a/NEWS.md b/NEWS.md index 1c8f0a7..539a2a3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,22 @@ +Release 0.12-r237 (24 June 2023) +-------------------------------- + +Notable changes: + + * New feature: added option --no-cs to disable the cs tag. This tag is not as + useful as the cs tag for nucleotide alignment because it does not encode the + matching amino acids. + + * New feature: output the number of frameshifts and in-frame stop codons in + the PAF output. It is non-trivial to parse in-frame stop codons. + + * Bugfix: fixed malformatted protein sequences when --gtf and --trans are both + specified (#45). + +(0.12: 24 June 2023, r237) + + + Release 0.11-r234 (18 April 2023) --------------------------------- diff --git a/format.c b/format.c index bb4b80a..8dd9f9f 100644 --- a/format.c +++ b/format.c @@ -296,10 +296,10 @@ static void mp_write_residue(kstring_t *out, const mp_idx_t *mi, const mp_mapopt kfree(0, str[0]); } -void mp_write_paf(kstring_t *s, const mp_idx_t *mi, const mp_bseq1_t *seq, const mp_reg1_t *r, int32_t gff_out) +void mp_write_paf(kstring_t *s, const mp_idx_t *mi, const mp_mapopt_t *opt, const mp_bseq1_t *seq, const mp_reg1_t *r) { const mp_ctg_t *ctg; - if (gff_out) mp_sprintf_lite(s, "##PAF\t"); + if (opt->flag & (MP_F_GFF|MP_F_GTF)) mp_sprintf_lite(s, "##PAF\t"); if (r == 0) { mp_sprintf_lite(s, "%s\t%d\t0\t0\t*\t*\t0\t0\t0\t0\t0\t0\n", seq->name, seq->l_seq); return; @@ -316,8 +316,10 @@ void mp_write_paf(kstring_t *s, const mp_idx_t *mi, const mp_bseq1_t *seq, const for (k = 0; k < r->p->n_cigar; ++k) mp_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, NS_CIGAR_STR[r->p->cigar[k]&0xf]); } else mp_sprintf_lite(s, "%d\t%d\t%d", r->chn_sc, r->chn_sc_ungap, r->cnt); - mp_sprintf_lite(s, "\t"); - mp_write_cs(s, mi, &seq->seq[r->qs], r); + if (!(opt->flag & MP_F_NO_CS)) { + mp_sprintf_lite(s, "\t"); + mp_write_cs(s, mi, &seq->seq[r->qs], r); + } mp_sprintf_lite(s, "\n"); } @@ -416,16 +418,16 @@ void mp_write_output(kstring_t *s, void *km, const mp_idx_t *mi, const mp_bseq1_ s->l = 0; if (r == 0) { if (opt->flag&MP_F_SHOW_UNMAP) - mp_write_paf(s, mi, seq, 0, opt->flag&MP_F_GFF); + mp_write_paf(s, mi, opt, seq, 0); } else if (opt->flag&MP_F_GTF) { if (opt->flag & (MP_F_SHOW_RESIDUE|MP_F_SHOW_TRANS)) { - mp_write_paf(s, mi, seq, r, opt->flag&MP_F_GTF); + mp_write_paf(s, mi, opt, seq, r); mp_write_residue(s, mi, opt, seq->seq, r); } mp_write_gtf(s, km, mi, seq, r, opt->gff_prefix, id, seq->name); } else { if (!(opt->flag&MP_F_NO_PAF)) - mp_write_paf(s, mi, seq, r, opt->flag&MP_F_GFF); + mp_write_paf(s, mi, opt, seq, r); if (opt->flag & (MP_F_SHOW_RESIDUE|MP_F_SHOW_TRANS)) mp_write_residue(s, mi, opt, seq->seq, r); if (opt->flag&MP_F_GFF) diff --git a/main.c b/main.c index b04eb1a..ca9e694 100644 --- a/main.c +++ b/main.c @@ -19,6 +19,7 @@ static ko_longopt_t long_options[] = { { "outc", ko_required_argument, 313 }, { "ie-coef", ko_required_argument, 314 }, { "trans", ko_no_argument, 315 }, + { "no-cs", ko_no_argument, 316 }, { "version", ko_no_argument, 401 }, { "no-kalloc", ko_no_argument, 501 }, { "dbg-qname", ko_no_argument, 502 }, @@ -148,6 +149,7 @@ int main(int argc, char *argv[]) else if (c == 313) mo.out_cov = atof(o.arg); // --outc else if (c == 314) mo.ie_coef = atof(o.arg); // --ie-coef else if (c == 315) mo.flag |= MP_F_SHOW_TRANS; // --trans + else if (c == 316) mo.flag |= MP_F_NO_CS; // --no-cs else if (c == 501) mp_dbg_flag |= MP_DBG_NO_KALLOC; // --no-kalloc else if (c == 502) mp_dbg_flag |= MP_DBG_QNAME; // --dbg-qname else if (c == 503) mp_dbg_flag |= MP_DBG_NO_REFINE; // --dbg-no-refine diff --git a/miniprot.1 b/miniprot.1 index 065b75a..dfa08b6 100644 --- a/miniprot.1 +++ b/miniprot.1 @@ -1,4 +1,4 @@ -.TH miniprot 1 "18 April 2023" "miniprot-0.11 (r234)" "Bioinformatics tools" +.TH miniprot 1 "24 June 2023" "miniprot-0.12 (r237)" "Bioinformatics tools" .SH NAME .PP miniprot - protein-to-genome alignment with splicing and frameshifts @@ -156,6 +156,9 @@ substitution corresponding to the `G' operator. .B --trans Output translated protein sequences on `##STA' lines. .TP +.B --no-cs +Do not output the cs tag +.TP .BI --max-intron-out \ NUM In the .B --aln diff --git a/miniprot.h b/miniprot.h index e31a032..68ed5c7 100644 --- a/miniprot.h +++ b/miniprot.h @@ -3,7 +3,7 @@ #include -#define MP_VERSION "0.11-r236-dirty" +#define MP_VERSION "0.12-r237" #define MP_F_NO_SPLICE 0x1 #define MP_F_NO_ALIGN 0x2 @@ -14,6 +14,7 @@ #define MP_F_NO_PRE_CHAIN 0x40 #define MP_F_SHOW_RESIDUE 0x80 #define MP_F_SHOW_TRANS 0x100 +#define MP_F_NO_CS 0x200 #define MP_FEAT_CDS 0 #define MP_FEAT_STOP 1