citations.bib

@article{kristianingsih_accurate_2021,
        doi = {10.1186/s12859-021-04293-3},
        url = {https://doi.org/10.1186/s12859-021-04293-3},
        year = {2021},
        volume = {22},
        number = {1},
	pages = {372}
	journal = {BMC Bioinformatics},
        author = {Ruth Kristianingsih and Dan MacLean},
        title = {Accurate plant pathogen effector protein classification ab initio with deepredeff: an ensemble of convolutional neural networks}
}

@article{jones_predector_2021,
	title = {An automated and combinative method for the predictive ranking of candidate effector proteins of fungal plant pathogens},
	volume = {11},
	doi = {10.1038/srep44598},
	language = {en},
	number = {19731},
	journal = {Scientific Reports},
	author = {Jones, Darcy A. B. and Rozano, Lina and Debler, Johannes and Mancera, Ricardo L. and Paula
Moolhuijzen, and James K. Hane},
	year = {2021},
}

@article{finn_pfam_2014,
	title = {Pfam: the protein families database},
	volume = {42},
	issn = {0305-1048},
	shorttitle = {Pfam},
	url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3965110/},
	doi = {10.1093/nar/gkt1223},
	abstract = {Pfam, available via servers in the UK (http://pfam.sanger.ac.uk/) and the USA (http://pfam.janelia.org/), is a widely used database of protein families, containing 14 831 manually curated entries in the current release, version 27.0. Since the last update article 2 years ago, we have generated 1182 new families and maintained sequence coverage of the UniProt Knowledgebase (UniProtKB) at nearly 80\%, despite a 50\% increase in the size of the underlying sequence database. Since our 2012 article describing Pfam, we have also undertaken a comprehensive review of the features that are provided by Pfam over and above the basic family data. For each feature, we determined the relevance, computational burden, usage statistics and the functionality of the feature in a website context. As a consequence of this review, we have removed some features, enhanced others and developed new ones to meet the changing demands of computational biology. Here, we describe the changes to Pfam content. Notably, we now provide family alignments based on four different representative proteome sequence data sets and a new interactive DNA search interface. We also discuss the mapping between Pfam and known 3D structures.},
	number = {Database issue},
	urldate = {2020-05-07},
	journal = {Nucleic Acids Research},
	author = {Finn, Robert D. and Bateman, Alex and Clements, Jody and Coggill, Penelope and Eberhardt, Ruth Y. and Eddy, Sean R. and Heger, Andreas and Hetherington, Kirstie and Holm, Liisa and Mistry, Jaina and Sonnhammer, Erik L. L. and Tate, John and Punta, Marco},
	month = jan,
	year = {2014},
	pmid = {24288371},
	pmcid = {PMC3965110},
	pages = {D222--D230},
}

@article{urban_phi-base_2020,
	title = {{PHI}-base: the pathogen–host interactions database},
	volume = {48},
	issn = {0305-1048},
	shorttitle = {{PHI}-base},
	url = {https://academic.oup.com/nar/article/48/D1/D613/5626528},
	doi = {10.1093/nar/gkz904},
	abstract = {Abstract.  The pathogen–host interactions database (PHI-base) is available at www.phi-base.org. PHI-base contains expertly curated molecular and biological info},
	language = {en},
	number = {D1},
	urldate = {2020-05-07},
	journal = {Nucleic Acids Research},
	author = {Urban, Martin and Cuzick, Alayne and Seager, James and Wood, Valerie and Rutherford, Kim and Venkatesh, Shilpa Yagwakote and De Silva, Nishadi and Martinez, Manuel Carbajo and Pedro, Helder and Yates, Andy D. and Hassani-Pak, Keywan and Hammond-Kosack, Kim E.},
	month = jan,
	year = {2020},
	note = {Publisher: Oxford Academic},
	pages = {D613--D620},
}

@article{steinegger_mmseqs2_2017,
	title = {{MMseqs2} enables sensitive protein sequence searching for the analysis of massive data sets},
	volume = {35},
	copyright = {2017 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {1546-1696},
	url = {https://www.nature.com/articles/nbt.3988},
	doi = {10.1038/nbt.3988},
	language = {en},
	number = {11},
	urldate = {2020-05-07},
	journal = {Nature Biotechnology},
	author = {Steinegger, Martin and Söding, Johannes},
	month = nov,
	year = {2017},
	note = {Number: 11
Publisher: Nature Publishing Group},
	pages = {1026--1028},
}

@article{zhang_dbcan2_2018,
	title = {{dbCAN2}: a meta server for automated carbohydrate-active enzyme annotation},
	volume = {46},
	issn = {0305-1048},
	shorttitle = {{dbCAN2}},
	url = {https://academic.oup.com/nar/article/46/W1/W95/4996582},
	doi = {10.1093/nar/gky418},
	abstract = {Abstract.  Complex carbohydrates of plants are the main food sources of animals and microbes, and serve as promising renewable feedstock for biofuel and biomate},
	language = {en},
	number = {W1},
	urldate = {2020-05-07},
	journal = {Nucleic Acids Research},
	author = {Zhang, Han and Yohe, Tanner and Huang, Le and Entwistle, Sarah and Wu, Peizhi and Yang, Zhenglu and Busk, Peter K. and Xu, Ying and Yin, Yanbin},
	month = jul,
	year = {2018},
	note = {Publisher: Oxford Academic},
	pages = {W95--W101},
}

@article{eddy_accelerated_2011,
	title = {Accelerated {Profile} {HMM} {Searches}},
	volume = {7},
	issn = {1553-7358},
	url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1002195},
	doi = {10.1371/journal.pcbi.1002195},
	abstract = {Profile hidden Markov models (profile HMMs) and probabilistic inference methods have made important contributions to the theory of sequence database homology search. However, practical use of profile HMM methods has been hindered by the computational expense of existing software implementations. Here I describe an acceleration heuristic for profile HMMs, the “multiple segment Viterbi” (MSV) algorithm. The MSV algorithm computes an optimal sum of multiple ungapped local alignment segments using a striped vector-parallel approach previously described for fast Smith/Waterman alignment. MSV scores follow the same statistical distribution as gapped optimal local alignment scores, allowing rapid evaluation of significance of an MSV score and thus facilitating its use as a heuristic filter. I also describe a 20-fold acceleration of the standard profile HMM Forward/Backward algorithms using a method I call “sparse rescaling”. These methods are assembled in a pipeline in which high-scoring MSV hits are passed on for reanalysis with the full HMM Forward/Backward algorithm. This accelerated pipeline is implemented in the freely available HMMER3 software package. Performance benchmarks show that the use of the heuristic MSV filter sacrifices negligible sensitivity compared to unaccelerated profile HMM searches. HMMER3 is substantially more sensitive and 100- to 1000-fold faster than HMMER2. HMMER3 is now about as fast as BLAST for protein searches.},
	language = {en},
	number = {10},
	urldate = {2020-05-07},
	journal = {PLOS Computational Biology},
	author = {Eddy, Sean R.},
	month = oct,
	year = {2011},
	note = {Publisher: Public Library of Science},
	keywords = {Algorithms, BLAST algorithm, Database searching, Heuristic alignment procedure, Hidden Markov models, Multiple alignment calculation, Sequence alignment, Sequence databases},
	pages = {e1002195},
}

@article{rice_emboss_2000,
	title = {{EMBOSS}: {The} {European} {Molecular} {Biology} {Open} {Software} {Suite}},
	volume = {16},
	issn = {0168-9525},
	shorttitle = {{EMBOSS}},
	url = {https://www.cell.com/trends/genetics/abstract/S0168-9525(00)02024-2},
	doi = {10.1016/S0168-9525(00)02024-2},
	language = {English},
	number = {6},
	urldate = {2020-05-07},
	journal = {Trends in Genetics},
	author = {Rice, Peter and Longden, Ian and Bleasby, Alan},
	month = jun,
	year = {2000},
	pmid = {10827456},
	note = {Publisher: Elsevier},
	keywords = {Bioinformatics, Database, EMBOSS, Genetics},
	pages = {276--277},
}

@article{krogh_predicting_2001,
	title = {Predicting transmembrane protein topology with a hidden {Markov} model: application to complete genomes},
	volume = {305},
	issn = {0022-2836},
	shorttitle = {Predicting transmembrane protein topology with a hidden {Markov} model},
	doi = {10.1006/jmbi.2000.4315},
	abstract = {We describe and validate a new membrane protein topology prediction method, TMHMM, based on a hidden Markov model. We present a detailed analysis of TMHMM's performance, and show that it correctly predicts 97-98 \% of the transmembrane helices. Additionally, TMHMM can discriminate between soluble and membrane proteins with both specificity and sensitivity better than 99 \%, although the accuracy drops when signal peptides are present. This high degree of accuracy allowed us to predict reliably integral membrane proteins in a large collection of genomes. Based on these predictions, we estimate that 20-30 \% of all genes in most genomes encode membrane proteins, which is in agreement with previous estimates. We further discovered that proteins with N(in)-C(in) topologies are strongly preferred in all examined organisms, except Caenorhabditis elegans, where the large number of 7TM receptors increases the counts for N(out)-C(in) topologies. We discuss the possible relevance of this finding for our understanding of membrane protein assembly mechanisms. A TMHMM prediction service is available at http://www.cbs.dtu.dk/services/TMHMM/.},
	language = {eng},
	number = {3},
	journal = {Journal of Molecular Biology},
	author = {Krogh, A. and Larsson, B. and von Heijne, G. and Sonnhammer, E. L.},
	month = jan,
	year = {2001},
	pmid = {11152613},
	keywords = {Animals, Bacterial Proteins, Computational Biology, Databases as Topic, Fungal Proteins, Genome, Internet, Markov Chains, Membrane Proteins, Plant Proteins, Porins, Protein Sorting Signals, Protein Structure, Secondary, Reproducibility of Results, Research Design, Sensitivity and Specificity, Software, Solubility},
	pages = {567--580},
}

@article{kall_combined_2004,
	title = {A {Combined} {Transmembrane} {Topology} and {Signal} {Peptide} {Prediction} {Method}},
	volume = {338},
	issn = {0022-2836},
	url = {http://www.sciencedirect.com/science/article/pii/S0022283604002943},
	doi = {10.1016/j.jmb.2004.03.016},
	abstract = {An inherent problem in transmembrane protein topology prediction and signal peptide prediction is the high similarity between the hydrophobic regions of a transmembrane helix and that of a signal peptide, leading to cross-reaction between the two types of predictions. To improve predictions further, it is therefore important to make a predictor that aims to discriminate between the two classes. In addition, topology information can be gained when successfully predicting a signal peptide leading a transmembrane protein since it dictates that the N terminus of the mature protein must be on the non-cytoplasmic side of the membrane. Here, we present Phobius, a combined transmembrane protein topology and signal peptide predictor. The predictor is based on a hidden Markov model (HMM) that models the different sequence regions of a signal peptide and the different regions of a transmembrane protein in a series of interconnected states. Training was done on a newly assembled and curated dataset. Compared to TMHMM and SignalP, errors coming from cross-prediction between transmembrane segments and signal peptides were reduced substantially by Phobius. False classifications of signal peptides were reduced from 26.1\% to 3.9\% and false classifications of transmembrane helices were reduced from 19.0\% to 7.7\%. Phobius was applied to the proteomes of Homo sapiens and Escherichia coli. Here we also noted a drastic reduction of false classifications compared to TMHMM/SignalP, suggesting that Phobius is well suited for whole-genome annotation of signal peptides and transmembrane regions. The method is available at http://phobius.cgb.ki.se/ as well as at http://phobius.binf.ku.dk/},
	language = {en},
	number = {5},
	urldate = {2020-05-07},
	journal = {Journal of Molecular Biology},
	author = {Käll, Lukas and Krogh, Anders and Sonnhammer, Erik L. L},
	month = may,
	year = {2004},
	keywords = {hidden Markov model, machine learning, signal peptide, topology prediction, transmembrane protein},
	pages = {1027--1036},
}

@article{savojardo_deepsig_2018,
	title = {{DeepSig}: deep learning improves signal peptide detection in proteins},
	volume = {34},
	issn = {1367-4803},
	shorttitle = {{DeepSig}},
	url = {https://academic.oup.com/bioinformatics/article/34/10/1690/4769493},
	doi = {10.1093/bioinformatics/btx818},
	abstract = {AbstractMotivation.  The identification of signal peptides in protein sequences is an important step toward protein localization and function characterization.R},
	language = {en},
	number = {10},
	urldate = {2020-05-07},
	journal = {Bioinformatics},
	author = {Savojardo, Castrense and Martelli, Pier Luigi and Fariselli, Piero and Casadio, Rita},
	month = may,
	year = {2018},
	note = {Publisher: Oxford Academic},
	pages = {1690--1696},
}

@article{armenteros_signalp_2019,
	title = {{SignalP} 5.0 improves signal peptide predictions using deep neural networks},
	volume = {37},
	copyright = {2019 The Author(s), under exclusive licence to Springer Nature America, Inc.},
	issn = {1546-1696},
	url = {https://www.nature.com/articles/s41587-019-0036-z},
	doi = {10.1038/s41587-019-0036-z},
	abstract = {SignalP 5.0 improves proteome-wide detection of signal peptides across all organisms and can distinguish between different types of signal peptides in prokaryotes.},
	language = {en},
	number = {4},
	urldate = {2020-05-07},
	journal = {Nature Biotechnology},
	author = {Armenteros, José Juan Almagro and Tsirigos, Konstantinos D. and Sønderby, Casper Kaae and Petersen, Thomas Nordahl and Winther, Ole and Brunak, Søren and Heijne, Gunnar von and Nielsen, Henrik},
	month = apr,
	year = {2019},
	note = {Number: 4
Publisher: Nature Publishing Group},
	pages = {420--423},
}

@article{dyrlov_bendtsen_improved_2004,
	title = {Improved {Prediction} of {Signal} {Peptides}: {SignalP} 3.0},
	volume = {340},
	issn = {0022-2836},
	shorttitle = {Improved {Prediction} of {Signal} {Peptides}},
	url = {http://www.sciencedirect.com/science/article/pii/S0022283604005972},
	doi = {10.1016/j.jmb.2004.05.028},
	abstract = {We describe improvements of the currently most popular method for prediction of classically secreted proteins, SignalP. SignalP consists of two different predictors based on neural network and hidden Markov model algorithms, where both components have been updated. Motivated by the idea that the cleavage site position and the amino acid composition of the signal peptide are correlated, new features have been included as input to the neural network. This addition, combined with a thorough error-correction of a new data set, have improved the performance of the predictor significantly over SignalP version 2. In version 3, correctness of the cleavage site predictions has increased notably for all three organism groups, eukaryotes, Gram-negative and Gram-positive bacteria. The accuracy of cleavage site prediction has increased in the range 6–17\% over the previous version, whereas the signal peptide discrimination improvement is mainly due to the elimination of false-positive predictions, as well as the introduction of a new discrimination score for the neural network. The new method has been benchmarked against other available methods. Predictions can be made at the publicly available web server http://www.cbs.dtu.dk/services/SignalP/},
	language = {en},
	number = {4},
	urldate = {2020-05-07},
	journal = {Journal of Molecular Biology},
	author = {Dyrløv Bendtsen, Jannick and Nielsen, Henrik and von Heijne, Gunnar and Brunak, Søren},
	month = jul,
	year = {2004},
	keywords = {hidden Markov model, neural network, signal peptidase I, signal peptide, SignalP},
	pages = {783--795},
}

@article{petersen_signalp_2011,
	title = {{SignalP} 4.0: discriminating signal peptides from transmembrane regions},
	volume = {8},
	copyright = {2011 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {1548-7105},
	shorttitle = {{SignalP} 4.0},
	url = {https://www.nature.com/articles/nmeth.1701},
	doi = {10.1038/nmeth.1701},
	language = {en},
	number = {10},
	urldate = {2020-05-07},
	journal = {Nature Methods},
	author = {Petersen, Thomas Nordahl and Brunak, Søren and Heijne, Gunnar von and Nielsen, Henrik},
	month = oct,
	year = {2011},
	note = {Number: 10
Publisher: Nature Publishing Group},
	pages = {785--786},
}

@article{di_tommaso_nextflow_2017,
	title = {Nextflow enables reproducible computational workflows},
	volume = {35},
	copyright = {2017 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
	issn = {1546-1696},
	url = {https://www.nature.com/articles/nbt.3820},
	doi = {10.1038/nbt.3820},
	language = {en},
	number = {4},
	urldate = {2020-07-06},
	journal = {Nature Biotechnology},
	author = {Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W. and Barja, Pablo Prieto and Palumbo, Emilio and Notredame, Cedric},
	month = apr,
	year = {2017},
	note = {Number: 4
Publisher: Nature Publishing Group},
	pages = {316--319},
}

@misc{tange_gnu_2020,
	title = {{GNU} {Parallel} 20200522 ('{Kraftwerk}')},
	url = {https://doi.org/10.5281/zenodo.3841377},
	publisher = {Zenodo},
	author = {Tange, Ole},
	month = may,
	year = {2020},
	doi = {10.5281/zenodo.3841377}
}

@article{sperschneider_effectorp_2016,
	title = {{EffectorP}: predicting fungal effector proteins from secretomes using machine learning},
	volume = {210},
	copyright = {© 2015 CSIRO New Phytologist © 2015 New Phytologist Trust},
	issn = {1469-8137},
	shorttitle = {{EffectorP}},
	url = {https://nph.onlinelibrary.wiley.com/doi/abs/10.1111/nph.13794},
	doi = {10.1111/nph.13794},
	abstract = {Eukaryotic filamentous plant pathogens secrete effector proteins that modulate the host cell to facilitate infection. Computational effector candidate identification and subsequent functional characterization delivers valuable insights into plant–pathogen interactions. However, effector prediction in fungi has been challenging due to a lack of unifying sequence features such as conserved N-terminal sequence motifs. Fungal effectors are commonly predicted from secretomes based on criteria such as small size and cysteine-rich, which suffers from poor accuracy. We present EffectorP which pioneers the application of machine learning to fungal effector prediction. EffectorP improves fungal effector prediction from secretomes based on a robust signal of sequence-derived properties, achieving sensitivity and specificity of over 80\%. Features that discriminate fungal effectors from secreted noneffectors are predominantly sequence length, molecular weight and protein net charge, as well as cysteine, serine and tryptophan content. We demonstrate that EffectorP is powerful when combined with in planta expression data for predicting high-priority effector candidates. EffectorP is the first prediction program for fungal effectors based on machine learning. Our findings will facilitate functional fungal effector studies and improve our understanding of effectors in plant–pathogen interactions. EffectorP is available at http://effectorp.csiro.au.},
	language = {en},
	number = {2},
	urldate = {2020-05-07},
	journal = {New Phytologist},
	author = {Sperschneider, Jana and Gardiner, Donald M. and Dodds, Peter N. and Tini, Francesco and Covarelli, Lorenzo and Singh, Karam B. and Manners, John M. and Taylor, Jennifer M.},
	year = {2016},
	note = {\_eprint: https://nph.onlinelibrary.wiley.com/doi/pdf/10.1111/nph.13794},
	keywords = {effector, EffectorP, fungal effector prediction, fungal pathogen, machine learning, secretomes},
	pages = {743--761},
}

@article{sperschneider_improved_2018,
	title = {Improved prediction of fungal effector proteins from secretomes with {EffectorP} 2.0},
	volume = {19},
	copyright = {© 2018 BSPP and John Wiley \& Sons Ltd},
	issn = {1364-3703},
	url = {https://bsppjournals.onlinelibrary.wiley.com/doi/abs/10.1111/mpp.12682},
	doi = {10.1111/mpp.12682},
	abstract = {Plant-pathogenic fungi secrete effector proteins to facilitate infection. We describe extensive improvements to EffectorP, the first machine learning classifier for fungal effector prediction. EffectorP 2.0 is now trained on a larger set of effectors and utilizes a different approach based on an ensemble of classifiers trained on different subsets of negative data, offering different views on classification. EffectorP 2.0 achieves an accuracy of 89\%, compared with 82\% for EffectorP 1.0 and 59.8\% for a small size classifier. Important features for effector prediction appear to be protein size, protein net charge as well as the amino acids serine and cysteine. EffectorP 2.0 decreases the number of predicted effectors in secretomes of fungal plant symbionts and saprophytes by 40\% when compared with EffectorP 1.0. However, EffectorP 1.0 retains value, and combining EffectorP 1.0 and 2.0 results in a stringent classifier with a low false positive rate of 9\%. EffectorP 2.0 predicts significant enrichments of effectors in 12 of 13 sets of infection-induced proteins from diverse fungal pathogens, whereas a small cysteine-rich classifier detects enrichment in only seven of 13. EffectorP 2.0 will fast track the prioritization of high-confidence effector candidates for functional validation and aid in improving our understanding of effector biology. EffectorP 2.0 is available at http://effectorp.csiro.au.},
	language = {en},
	number = {9},
	urldate = {2020-05-07},
	journal = {Molecular Plant Pathology},
	author = {Sperschneider, Jana and Dodds, Peter N. and Gardiner, Donald M. and Singh, Karam B. and Taylor, Jennifer M.},
	year = {2018},
	note = {\_eprint: https://bsppjournals.onlinelibrary.wiley.com/doi/pdf/10.1111/mpp.12682},
	keywords = {effector, effector prediction, EffectorP, fungal pathogens, machine learning, secretomes},
	pages = {2094--2110},
}

@article {sperschneider_effectorp3_2021,
	author = {Sperschneider, Jana and Dodds, Peter N.},
	title = {EffectorP 3.0: prediction of apoplastic and cytoplasmic effectors in fungi and oomycetes},
	year = {2021},
	doi = {10.1101/2021.07.28.454080},
	publisher = {Cold Spring Harbor Laboratory},
	journal = {bioRxiv}
}


@article{sperschneider_localizer_2017,
	title = {{LOCALIZER}: subcellular localization prediction of both plant and effector proteins in the plant cell},
	volume = {7},
	copyright = {2017 The Author(s)},
	issn = {2045-2322},
	shorttitle = {{LOCALIZER}},
	url = {https://www.nature.com/articles/srep44598},
	doi = {10.1038/srep44598},
	abstract = {Pathogens secrete effector proteins and many operate inside plant cells to enable infection. Some effectors have been found to enter subcellular compartments by mimicking host targeting sequences. Although many computational methods exist to predict plant protein subcellular localization, they perform poorly for effectors. We introduce LOCALIZER for predicting plant and effector protein localization to chloroplasts, mitochondria, and nuclei. LOCALIZER shows greater prediction accuracy for chloroplast and mitochondrial targeting compared to other methods for 652 plant proteins. For 107 eukaryotic effectors, LOCALIZER outperforms other methods and predicts a previously unrecognized chloroplast transit peptide for the ToxA effector, which we show translocates into tobacco chloroplasts. Secretome-wide predictions and confocal microscopy reveal that rust fungi might have evolved multiple effectors that target chloroplasts or nuclei. LOCALIZER is the first method for predicting effector localisation in plants and is a valuable tool for prioritizing effector candidates for functional investigations. LOCALIZER is available at http://localizer.csiro.au/.},
	language = {en},
	number = {1},
	urldate = {2020-05-07},
	journal = {Scientific Reports},
	author = {Sperschneider, Jana and Catanzariti, Ann-Maree and DeBoer, Kathleen and Petre, Benjamin and Gardiner, Donald M. and Singh, Karam B. and Dodds, Peter N. and Taylor, Jennifer M.},
	month = mar,
	year = {2017},
	note = {Number: 1
Publisher: Nature Publishing Group},
	pages = {1--14},
}

@article{sperschneider_apoplastp_2018,
	title = {{ApoplastP}: prediction of effectors and plant proteins in the apoplast using machine learning},
	volume = {217},
	copyright = {© 2017 CSIRO New Phytologist © 2017 New Phytologist Trust},
	issn = {1469-8137},
	shorttitle = {{ApoplastP}},
	url = {https://nph.onlinelibrary.wiley.com/doi/abs/10.1111/nph.14946},
	doi = {10.1111/nph.14946},
	abstract = {The plant apoplast is integral to intercellular signalling, transport and plant–pathogen interactions. Plant pathogens deliver effectors both into the apoplast and inside host cells, but no computational method currently exists to discriminate between these localizations. We present ApoplastP, the first method for predicting whether an effector or plant protein localizes to the apoplast. ApoplastP uncovers features of apoplastic localization common to both effectors and plant proteins, namely depletion in glutamic acid, acidic amino acids and charged amino acids and enrichment in small amino acids. ApoplastP predicts apoplastic localization in effectors with a sensitivity of 75\% and a false positive rate of 5\%, improving the accuracy of cysteine-rich classifiers by {\textgreater} 13\%. ApoplastP does not depend on the presence of a signal peptide and correctly predicts the localization of unconventionally secreted proteins. The secretomes of fungal saprophytes as well as necrotrophic, hemibiotrophic and extracellular fungal pathogens are enriched for predicted apoplastic proteins. Rust pathogens have low proportions of predicted apoplastic proteins, but these are highly enriched for predicted effectors. ApoplastP pioneers apoplastic localization prediction using machine learning. It will facilitate functional studies and will be valuable for predicting if an effector localizes to the apoplast or if it enters plant cells.},
	language = {en},
	number = {4},
	urldate = {2020-05-07},
	journal = {New Phytologist},
	author = {Sperschneider, Jana and Dodds, Peter N. and Singh, Karam B. and Taylor, Jennifer M.},
	year = {2018},
	note = {\_eprint: https://nph.onlinelibrary.wiley.com/doi/pdf/10.1111/nph.14946},
	keywords = {apoplast, apoplastic localization, ApoplastP, effectors, machine learning, plant pathogens, plant proteomics},
	pages = {1764--1778},
}

@article{almagro_armenteros_deeploc_2017,
	title = {{DeepLoc}: prediction of protein subcellular localization using deep learning},
	volume = {33},
	issn = {1367-4803},
	shorttitle = {{DeepLoc}},
	url = {https://academic.oup.com/bioinformatics/article/33/21/3387/3931857},
	doi = {10.1093/bioinformatics/btx431},
	language = {en},
	number = {21},
	urldate = {2020-05-07},
	journal = {Bioinformatics},
	author = {Almagro Armenteros, José Juan and Sønderby, Casper Kaae and Sønderby, Søren Kaae and Nielsen, Henrik and Winther, Ole},
	month = nov,
	year = {2017},
	note = {Publisher: Oxford Academic},
	pages = {3387--3395},
}

@article{armenteros_detecting_2019,
	title = {Detecting sequence signals in targeting peptides using deep learning},
	volume = {2},
	copyright = {© 2019 Armenteros et al.. https://creativecommons.org/licenses/by/4.0/This article is available under a Creative Commons License (Attribution 4.0 International, as described at https://creativecommons.org/licenses/by/4.0/).},
	issn = {2575-1077},
	url = {https://www.life-science-alliance.org/content/2/5/e201900429},
	doi = {10.26508/lsa.201900429},
	abstract = {In bioinformatics, machine learning methods have been used to predict features embedded in the sequences. In contrast to what is generally assumed, machine learning approaches can also provide new insights into the underlying biology. Here, we demonstrate this by presenting TargetP 2.0, a novel state-of-the-art method to identify N-terminal sorting signals, which direct proteins to the secretory pathway, mitochondria, and chloroplasts or other plastids. By examining the strongest signals from the attention layer in the network, we find that the second residue in the protein, that is, the one following the initial methionine, has a strong influence on the classification. We observe that two-thirds of chloroplast and thylakoid transit peptides have an alanine in position 2, compared with 20\% in other plant proteins. We also note that in fungi and single-celled eukaryotes, less than 30\% of the targeting peptides have an amino acid that allows the removal of the N-terminal methionine compared with 60\% for the proteins without targeting peptide. The importance of this feature for predictions has not been highlighted before.},
	language = {en},
	number = {5},
	urldate = {2020-05-07},
	journal = {Life Science Alliance},
	author = {Armenteros, Jose Juan Almagro and Salvatore, Marco and Emanuelsson, Olof and Winther, Ole and Heijne, Gunnar von and Elofsson, Arne and Nielsen, Henrik},
	month = oct,
	year = {2019},
	pmid = {31570514},
	note = {Publisher: Life Science Alliance
Section: Methods},
}

@article {teufel_signalp6_2021,
	author = {Teufel, Felix and Armenteros, Jos{\'e} Juan Almagro and Johansen, Alexander Rosenberg and G{\'i}slason, Magn{\'u}s Halld{\'o}r and Pihl, Silas Irby and Tsirigos, Konstantinos D. and Winther, Ole and Brunak, S{\o}ren and von Heijne, Gunnar and Nielsen, Henrik},
	title = {SignalP 6.0 achieves signal peptide prediction across all types using protein language models},
	year = {2021},
	doi = {10.1101/2021.06.09.447770},
	publisher = {Cold Spring Harbor Laboratory},
	journal = {bioRxiv}
}