Skip to content

Commit

Permalink
introduce MIN_HMM_EVALUE constant
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Sep 24, 2024
1 parent 870a427 commit 4f40f46
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
1 change: 1 addition & 0 deletions bakta/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
MIN_PSC_IDENTITY = 0.9 # min protein identity for PSC detection
MIN_SORF_COVERAGE = 0.9 # min sORF coverage for PSC detection
MIN_SORF_IDENTITY = 0.9 # min sORF identity for PSC detection
MIN_HMM_EVALUE = 1e-6 # min evalue for CDS HMM searches
HYPOTHETICAL_PROTEIN = 'hypothetical protein' # hypothetical protein product description
CDS_MAX_OVERLAPS = 30 # max overlap [bp] allowed for user-provided/de novo-predicted CDS overlaps
CDS_SOURCE_USER = 'user-provided'
Expand Down
3 changes: 2 additions & 1 deletion bakta/features/orf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def detect_spurious(orfs: Sequence[dict]):
for top_hits in pyhmmer.hmmsearch(hmm, proteins, bit_cutoffs='gathering', cpus=cfg.threads):
for hit in top_hits:
orf = orf_by_aa_digest[hit.name.decode()]
if hit.evalue > 1E-5:
print(f'ANTIFAM: hit.evalue={hit.evalue}')
if hit.evalue > bc.MIN_HMM_EVALUE:
log.debug(
'discard low spurious E value: contig=%s, start=%i, stop=%i, strand=%s, subject=%s, evalue=%1.1e, bitscore=%f',
orf['contig'], orf['start'], orf['stop'], orf['strand'], hit.best_domain.alignment.hmm_name.decode(), hit.evalue, hit.score
Expand Down

0 comments on commit 4f40f46

Please sign in to comment.