diff --git a/gfa-base.c b/gfa-base.c index 3c41903..2901cfb 100644 --- a/gfa-base.c +++ b/gfa-base.c @@ -210,10 +210,16 @@ void gfa_fix_arc_len(gfa_t *g) for (k = 0; k < g->n_arc; ++k) { gfa_arc_t *a = &g->arc[k]; uint32_t v = gfa_arc_head(*a), w = gfa_arc_tail(*a); - if (g->seg[v>>1].del || g->seg[w>>1].del) { + const gfa_seg_t *sv = &g->seg[v>>1]; + if (!sv->del && sv->len < a->ov) { + if (gfa_verbose >= 2) + fprintf(stderr, "[W] overlap length longer than segment length for '%s': %d > %d\n", sv->name, a->ov, sv->len); + a->ov = sv->len; + } + if (sv->del || g->seg[w>>1].del) { a->del = 1; } else { - a->v_lv |= g->seg[v>>1].len - a->ov; + a->v_lv |= sv->len - a->ov; } } } @@ -296,6 +302,11 @@ void gfa_arc_rm(gfa_t *g) uint32_t u = g->arc[e].v_lv>>32, v = g->arc[e].w; if (!g->arc[e].del && !g->seg[u>>1].del && !g->seg[v>>1].del) g->arc[n++] = g->arc[e]; + else { + gfa_aux_t *aux = &g->link_aux[g->arc[e].link_id]; + free(aux->aux); + aux->aux = 0, aux->l_aux = aux->m_aux = 0; + } } if (n < g->n_arc) { // arc index is out of sync if (g->idx) free(g->idx); @@ -325,7 +336,7 @@ int32_t gfa_check_multi(const gfa_t *g) int32_t nv = gfa_arc_n(g, v); max_nv = max_nv > nv? max_nv : nv; } - if (max_nv == 1) return 0; + if (max_nv == 1 || max_nv < 0) return 0; GFA_MALLOC(buf, max_nv); for (v = 0; v < n_vtx; ++v) { int32_t i, s, nv = gfa_arc_n(g, v); @@ -422,7 +433,7 @@ static inline int gfa_aux_type2size(int x) } #define __skip_tag(s) do { \ - int type = toupper(*(s)); \ + int type = *(s); \ ++(s); \ if (type == 'Z') { while (*(s)) ++(s); ++(s); } \ else if (type == 'B') (s) += 5 + gfa_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \ diff --git a/gfa-priv.h b/gfa-priv.h index adedd9d..18b84ae 100644 --- a/gfa-priv.h +++ b/gfa-priv.h @@ -1,6 +1,7 @@ #ifndef __GFA_PRIV_H__ #define __GFA_PRIV_H__ +#include #include "gfa.h" #define GFA_MALLOC(ptr, len) ((ptr) = (__typeof__(ptr))malloc((len) * sizeof(*(ptr)))) @@ -24,10 +25,19 @@ typedef struct { typedef struct { int32_t n_v, n_a, is_dag; gfa_subv_t *v; - int32_t *a; + uint64_t *a; // high 32 bits: point to the neighbor; low 32 bit: arc index in the graph void *km; } gfa_sub_t; +typedef struct { + int32_t snid, ss, se; + uint32_t vs, ve; + int32_t n_seg, len_max, len_min; + float cf_max, cf_min; + uint32_t *v; + char *seq_max, *seq_min; // seq_max and seq_min point to v[] +} gfa_bubble_t; + #ifdef __cplusplus extern "C" { #endif @@ -58,9 +68,11 @@ uint32_t gfa_fix_multi(gfa_t *g); int gfa_arc_del_trans(gfa_t *g, int fuzz); // transitive reduction int gfa_arc_del_short(gfa_t *g, float drop_ratio); // delete short arcs int gfa_cut_tip(gfa_t *g, int max_ext); // cut tips +int gfa_topocut(gfa_t *g, int max_ext, float drop_ratio); +int gfa_bub_simple(gfa_t *g, int min_side, int max_side); int gfa_cut_internal(gfa_t *g, int max_ext); // drop internal segments int gfa_cut_biloop(gfa_t *g, int max_ext); // Hmm... I forgot... Some type of weird local topology -int gfa_pop_bubble(gfa_t *g, int max_dist); // bubble popping +int gfa_pop_bubble(gfa_t *g, int max_dist, int protect_tip); // bubble popping gfa_t *gfa_ug_gen(const gfa_t *g); // subset, modifying the graph @@ -78,7 +90,9 @@ void gfa_augment(gfa_t *g, int32_t n_ins, const gfa_ins_t *ins, int32_t n_ctg, c gfa_sfa_t *gfa_gfa2sfa(const gfa_t *g, int32_t *n_sfa_, int32_t write_seq); -void gfa_blacklist_print(const gfa_t *g, FILE *fp, int32_t min_len); // FIXME: doesn't work with translocations +gfa_bubble_t *gfa_bubble(const gfa_t *g, int32_t *n_); // FIXME: doesn't work with translocation + +void gfa_gt_simple_print(const gfa_t *g, float min_dc, int32_t is_path); // FIXME: doesn't work with translocations void gfa_aux_update_cv(gfa_t *g, const char *tag, const double *cov_seg, const double *cov_link); diff --git a/gfa.h b/gfa.h index c820685..cbf47e4 100644 --- a/gfa.h +++ b/gfa.h @@ -4,7 +4,7 @@ #include #include -#define GFA_VERSION "0.3-r70" +#define GFA_VERSION "0.3-r116-dirty" #define GFA_O_OV_EXT 0x1 #define GFA_O_NO_SEQ 0x2 diff --git a/minigraph.h b/minigraph.h index 8250060..3c80272 100644 --- a/minigraph.h +++ b/minigraph.h @@ -4,7 +4,7 @@ #include #include "gfa.h" -#define MG_VERSION "0.6-r306-dirty" +#define MG_VERSION "0.6-r307-dirty" #define MG_M_SPLICE 0x10 #define MG_M_SR 0x20