Skip to content

Commit

Permalink
remove remnant of terms
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Oct 10, 2024
1 parent ae4142b commit 51f4d11
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions bakta/features/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
RE_PROTEIN_NODE = re.compile(r'NODE_', flags=re.IGNORECASE)
RE_PROTEIN_POTENTIAL_CONTIG_NAME = re.compile(r'(genome|shotgun)', flags=re.IGNORECASE)
RE_PROTEIN_DOMAIN_CONTAINING = re.compile(r'domain-containing protein', flags=re.IGNORECASE)
RE_PROTEIN_REMNANT = re.compile(r'Remnant of ', re.IGNORECASE)
RE_PROTEIN_NO_LETTERS = re.compile(r'[^A-Za-z]')
RE_PROTEIN_SUSPECT_CHARS_DISCARD = re.compile(r'[.#]')
RE_PROTEIN_SUSPECT_CHARS_REPLACE = re.compile(r'[@=?%]')
Expand Down Expand Up @@ -566,6 +567,11 @@ def revise_cds_product(product: str):
if(product != old_product):
log.info('fix product: replace FOG ids. new=%s, old=%s', product, old_product)

old_product = product
product = RE_PROTEIN_REMNANT.sub('', product) # remove 'Remnant of's
if(product != old_product):
log.info('fix product: replace remnant ofs. new=%s, old=%s', product, old_product)

old_product = product
dufs = [] # replace DUF-containing products
for m in RE_DOMAIN_OF_UNKNOWN_FUCTION.finditer(product):
Expand Down

0 comments on commit 51f4d11

Please sign in to comment.