Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added argument for specifying NetMHCIIpan/NetMHCIIpanEL version #1181

Open
wants to merge 6 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pvactools/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"output_parser",
"valid_alleles",
'valid_algorithms',
'valid_netmhciipan_versions',
'net_chop',
"netmhc_stab",
"filter",
Expand Down
43 changes: 31 additions & 12 deletions pvactools/lib/output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,11 @@ def get_scores(self, line, method):
return {'score': float(line['BigMHC_IM'])}
elif method.lower() == 'netmhcpan_el':
return {'score': float(line['score'])}
elif method.lower() == 'netmhciipan_el':
return {'score': float(line['score'])}
elif 'netmhciipan_el' in method.lower():
try:
return {'score': float(line['score'])}
except:
return {'ic50': float(line['ic50'])}
else:
return {'ic50': float(line['ic50'])}

Expand Down Expand Up @@ -596,7 +599,7 @@ def output_headers(self):
headers.append("%s MT Score" % pretty_method)
continue

if method in ['netmhcpan_el', 'netmhciipan_el']:
if 'netmhciipan_el' in method or 'netmhcpan_el' in method:
headers.append("%s WT Score" % pretty_method)
headers.append("%s MT Score" % pretty_method)
else:
Expand All @@ -620,9 +623,12 @@ def flurry_headers(self, headers):

def prediction_methods(self):
methods = set()
pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")

for input_iedb_file in self.input_iedb_files:
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)
match = pattern.match(filename)
method = match.group(1)
methods.add(method)

return sorted(list(methods))
Expand Down Expand Up @@ -761,6 +767,7 @@ def execute(self):


class DefaultOutputParser(OutputParser):

def parse_iedb_file(self, tsv_entries):
with open(self.key_file, 'r') as key_file_reader:
protein_identifiers_from_label = yaml.load(key_file_reader, Loader=yaml.FullLoader)
Expand All @@ -769,8 +776,12 @@ def parse_iedb_file(self, tsv_entries):
for input_iedb_file in self.input_iedb_files:
with open(input_iedb_file, 'r') as reader:
iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)

pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
match = pattern.match(filename)
method = match.group(1)

for line in iedb_tsv_reader:
if "Warning: Potential DNA sequence(s)" in line['allele']:
continue
Expand Down Expand Up @@ -831,8 +842,12 @@ def parse_iedb_file(self):
for input_iedb_file in self.input_iedb_files:
with open(input_iedb_file, 'r') as reader:
iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)

pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
match = pattern.match(filename)
method = match.group(1)

for line in iedb_tsv_reader:
if "Warning: Potential DNA sequence(s)" in line['allele']:
continue
Expand Down Expand Up @@ -946,7 +961,7 @@ def output_headers(self):
headers.append("%s Score" % pretty_method)
continue

if method in ['netmhcpan_el', 'netmhciipan_el']:
if 'netmhciipan_el' in method or 'netmhcpan_el' in method:
headers.append("%s Score" % pretty_method)
else:
headers.append("%s IC50 Score" % pretty_method)
Expand Down Expand Up @@ -1017,8 +1032,12 @@ def parse_iedb_file(self):
# input iedb file
with open(input_iedb_file, 'r') as reader:
iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)

pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
match = pattern.match(filename)
method = match.group(1)

# header: allele, seq_num, start, end, length, peptide, ic50, percentile_rank
for line in iedb_tsv_reader:
if "Warning: Potential DNA sequence(s)" in line['allele']:
Expand Down
15 changes: 13 additions & 2 deletions pvactools/lib/prediction_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,9 +653,13 @@ def url(self):
def parse_iedb_allele_file(self):
#Ultimately we probably want this method to call out to IEDB but their command is currently broken
#curl --data "method=ann&species=human" http://tools-api.iedb.org/tools_api/mhci/
file_name = next(
(name for name in ["netmhciipan", "netmhciipan_el"] if name in self.iedb_prediction_method),
self.iedb_prediction_method
)
base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
iedb_alleles_dir = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_ii')
iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % self.iedb_prediction_method)
iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % file_name)
alleles = []
with open(iedb_alleles_file_name) as iedb_alleles_file:
for row in iedb_alleles_file:
Expand All @@ -675,14 +679,21 @@ def iedb_executable_params(self, iedb_executable_path, method, allele, input_fil
allele = allele.replace('-DPB', '/DPB').replace('-DQB', '/DQB')
return [iedb_executable_path, method, allele, input_file, str(epitope_length)]

class NetMHCIIVersion:
netmhciipan_version = None

class NetMHCIIpan(IEDBMHCII):
@property
def iedb_prediction_method(self):
return 'NetMHCIIpan'
if NetMHCIIVersion.netmhciipan_version in ['4.0', '4.2', '4.3']:
return 'netmhciipan_ba-' + NetMHCIIVersion.netmhciipan_version
return 'netmhciipan_ba'

class NetMHCIIpanEL(IEDBMHCII):
@property
def iedb_prediction_method(self):
if NetMHCIIVersion.netmhciipan_version in ['4.0', '4.2', '4.3']:
return 'netmhciipan_el-' + NetMHCIIVersion.netmhciipan_version
return 'netmhciipan_el'

class NNalign(IEDBMHCII):
Expand Down
6 changes: 6 additions & 0 deletions pvactools/lib/run_argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ def __init__(self, tool_name, input_file_help):
default=1,
help="Number of threads to use for parallelizing peptide-MHC binding prediction calls.",
)
parser.add_argument(
"--netmhciipan-version",
choices=["4.3", "4.2", "4.1", "4.0"],
default="4.1",
help="Specify the version of NetMHCIIpan or NetMHCIIpanEL to be used during the run.",
)
self.parser = parser

def prediction_args(self):
Expand Down
28 changes: 28 additions & 0 deletions pvactools/lib/valid_netmhciipan_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import argparse


class ValidNetMHCIIPanVersions:
valid_versions = ["4.3", "4.2", "4.1 (Default)", "4.0 (Not supported by standalone IEDB)"]

def __init__(self, list):
self.list = list

def print_valid_versions(self):
if self.list:
print("Valid NetMHCIIpan and NetMHCIIpanEL Versions")
print('\n'.join([a for a in self.valid_versions]))

@classmethod
def parser(cls, tool="pvacseq"):
parser = argparse.ArgumentParser(
"%s valid_netmhcpan_versions" % tool,
description="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-l', '--list',
help="List the valid NetMHCIIpan and NetMHCIIpanEL versions.",
default='None',
action='store_true'
)
return parser
1 change: 1 addition & 0 deletions pvactools/tools/pvacbind/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
'top_score_filter',
'net_chop',
'netmhc_stab',
'valid_netmhciipan_versions',
'calculate_reference_proteome_similarity',
'generate_aggregated_report',
'identify_problematic_amino_acids',
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacbind/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ def main():
)
valid_algorithms_parser.set_defaults(func=valid_algorithms)

valid_netmhciipan_versions_parser = subparsers.add_parser(
"valid_netmhciipan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
valid_netmhciipan_versions_parser.set_defaults(func=valid_netmhciipan_versions)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs",
Expand Down
4 changes: 4 additions & 0 deletions pvactools/tools/pvacbind/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def main(args_input = sys.argv[1:]):
input_file_type = 'fasta'
base_output_dir = os.path.abspath(args.output_dir)

if (args.netmhciipan_version == '4.0' and args.iedb_install_directory is not None):
raise Exception("Standalone IEDB does not support version 4.0")
NetMHCIIVersion.netmhciipan_version = args.netmhciipan_version

(class_i_prediction_algorithms, class_ii_prediction_algorithms) = split_algorithms(args.prediction_algorithms)
alleles = combine_class_ii_alleles(args.allele)
(class_i_alleles, class_ii_alleles, species) = split_alleles(alleles)
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacbind/valid_netmhciipan_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.valid_netmhciipan_versions import ValidNetMHCIIPanVersions

def define_parser():
return ValidNetMHCIIPanVersions.parser('pvacbind')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

ValidNetMHCIIPanVersions(args.list).print_valid_versions()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pvactools/tools/pvacseq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
'top_score_filter',
'net_chop',
'netmhc_stab',
'valid_netmhciipan_versions',
'calculate_reference_proteome_similarity',
'transcript_support_level_filter',
'identify_problematic_amino_acids',
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacseq/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ def define_parser():
)
valid_algorithms_parser.set_defaults(func=valid_algorithms)

valid_netmhciipan_versions_parser = subparsers.add_parser(
"valid_netmhciipan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
valid_netmhciipan_versions_parser.set_defaults(func=valid_netmhciipan_versions)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs.",
Expand Down
4 changes: 4 additions & 0 deletions pvactools/tools/pvacseq/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ def main(args_input = sys.argv[1:]):
input_file_type = 'vcf'
base_output_dir = os.path.abspath(args.output_dir)

if (args.netmhciipan_version == '4.0' and args.iedb_install_directory is not None):
raise Exception("Standalone IEDB does not support version 4.0")
NetMHCIIVersion.netmhciipan_version = args.netmhciipan_version

(class_i_prediction_algorithms, class_ii_prediction_algorithms) = split_algorithms(args.prediction_algorithms)
alleles = combine_class_ii_alleles(args.allele)
(class_i_alleles, class_ii_alleles, species) = split_alleles(alleles)
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacseq/valid_netmhciipan_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.valid_netmhciipan_versions import ValidNetMHCIIPanVersions

def define_parser():
return ValidNetMHCIIPanVersions.parser('pvacseq')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

ValidNetMHCIIPanVersions(args.list).print_valid_versions()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pvactools/tools/pvacsplice/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
'identify_problematic_amino_acids',
'net_chop',
'netmhc_stab',
'valid_netmhciipan_versions',
'run',
'top_score_filter',
'transcript_support_level_filter',
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacsplice/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ def define_parser():
)
valid_alleles_parser.set_defaults(func=valid_alleles)

valid_netmhciipan_versions_parser = subparsers.add_parser(
"valid_netmhciipan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
valid_netmhciipan_versions_parser.set_defaults(func=valid_netmhciipan_versions)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs.",
Expand Down
4 changes: 4 additions & 0 deletions pvactools/tools/pvacsplice/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ def main(args_input = sys.argv[1:]):
junctions_dir = os.path.abspath(args.output_dir)
os.makedirs(junctions_dir, exist_ok=True)

if (args.netmhciipan_version == '4.0' and args.iedb_install_directory is not None):
raise Exception("Standalone IEDB does not support version 4.0")
NetMHCIIVersion.netmhciipan_version = args.netmhciipan_version

(class_i_prediction_algorithms, class_ii_prediction_algorithms) = split_algorithms(args.prediction_algorithms)
(class_i_alleles, class_ii_alleles, species) = split_alleles(args.allele)

Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacsplice/valid_netmhciipan_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.valid_netmhciipan_versions import ValidNetMHCIIPanVersions

def define_parser():
return ValidNetMHCIIPanVersions.parser('pvacsplice')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

ValidNetMHCIIPanVersions(args.list).print_valid_versions()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pvactools/tools/pvacvector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
'visualize',
'valid_alleles',
'valid_algorithms',
'valid_netmhciipan_versions',
'allele_specific_cutoffs',
'download_example_data',
]
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacvector/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def define_parser():
)
valid_algorithms_parser.set_defaults(func=valid_algorithms)

valid_netmhciipan_versions_parser = subparsers.add_parser(
"valid_netmhciipan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
valid_netmhciipan_versions_parser.set_defaults(func=valid_netmhciipan_versions)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs",
Expand Down
5 changes: 5 additions & 0 deletions pvactools/tools/pvacvector/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pvactools.lib.pvacvector_input_fasta_generator import PvacvectorInputFastaGenerator
from pvactools.lib.pipeline import *
from pvactools.lib.run_utils import *
from pvactools.lib.prediction_class import NetMHCIIVersion
from pvactools.lib.prediction_class_utils import *

def define_parser():
Expand Down Expand Up @@ -400,6 +401,10 @@ def main(args_input=sys.argv[1:]):
base_output_dir = os.path.abspath(args.output_dir)
os.makedirs(base_output_dir, exist_ok=True)

if (args.netmhciipan_version == '4.0' and args.iedb_install_directory is not None):
raise Exception("Standalone IEDB does not support version 4.0")
NetMHCIIVersion.netmhciipan_version = args.netmhciipan_version

if os.environ.get('TEST_FLAG') or os.environ.get('TEST_FLAG') == '1':
random.seed(0.5)
if generate_input_fasta:
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacvector/valid_netmhciipan_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.valid_netmhciipan_versions import ValidNetMHCIIPanVersions

def define_parser():
return ValidNetMHCIIPanVersions.parser('pvacvector')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

ValidNetMHCIIPanVersions(args.list).print_valid_versions()

if __name__ == "__main__":
main()
Loading