Skip to content

Commit

Permalink
Add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
yufongpeng committed Jun 30, 2022
1 parent 1879c40 commit 32b8118
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 53 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
[![Build Status](https://github.com/yufongpeng/PeptideSeq.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/yufongpeng/PeptideSeq.jl/actions/workflows/CI.yml?query=branch%3Amain)
[![Coverage](https://codecov.io/gh/yufongpeng/PeptideSeq.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/yufongpeng/PeptideSeq.jl)

*PeptideSeq.jl* is a julia package for predicting digested peptide sequence, adding modification and generating expected fragments in mass spectrometry.
*PeptideSeq.jl* is a julia package for predicting peptide sequence after digestion, adding modification and generating expected fragments in mass spectrometry. Digestion enzyme and modification can be customized.

## Installation
This package is not yet registered. Insall it through github:
This package is not registered yet. Insall it through github:
```julia
julia> using Pkg; Pkg.add("https://github.com/yufongpeng/PeptideSeq.jl")
```
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Modification Accurate Mass Avearge Mass Sites
3NPH 135.043262 135.043262 D E $
3NPH 135.043262 135.12472 D E $
59 changes: 58 additions & 1 deletion src/PeptideSeq.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
module PeptideSeq
using IterTools, PrettyTables

export Protein, Peptide, Fragments, digest!, modify!, ionize!, fragmentation
export Protein, Peptide, Fragments,

digest!, modify!, ionize!, fragmentation,

add_enzyme!, add_modification!,

MODIFICATION_SITE, ENZYME, CONFIG

"""
Peptide
Repressentation of a peptide digested from a `Protein`.
# Field
* `origin`: Oringinal protein sequence
* `position`: Position of this peptide counting from N-terminal
* `mass`: Monoisotopic or average mass depending on `CONFIG["ACCURACY"]`
* `adduct`: Adduct of the ionized peptide. When the peptide is not ionized, it is "[M]".
* `miss_cleavage`: Number of miss cleavages allowed
* `modification`: modification on the peptide
"""
mutable struct Peptide
origin::AbstractString
position::UnitRange{Int}
Expand All @@ -18,6 +36,17 @@ struct Fragments
fragments::NamedTuple{(:type, :mass), Tuple{Vector{String}, Vector{Float64}}}
end

"""
Protein
Protein(sequence, enzyme = "", modification = Dict{String, Vector{Int}}())
Repressentation of a protein
# Field
* `origin`: Protein sequence
* `peptides`: a vector of `Peptide` digested with `enzyme`
* `modification`: modification on the protein
* `enzyme`: an enzyme for digesting the protein
"""
mutable struct Protein
origin::AbstractString
peptides::Vector{Peptide}
Expand All @@ -34,5 +63,33 @@ include("io.jl")
include("preparation.jl")
include("ms_spec.jl")

"""
add_enzyme!(source)
Add custom enzyme. The `source` must be a tsv file. The first row is the header (can be empty), the first column is the name of enzyme and the other column is regular expressions of the cleavage sites.
If multiple regular expressions are given, the digested sites will be the union of all possible sites.
See the example file "config_example/ENZYME.tsv".
"""
function add_enzyme!(source)
for e in CSV.Rows(source, delim = "\t")
push!(ENZYME, e[1] => [eval(Meta.parse(r)) for r in getindex.(Ref(e), 2:length(e)) if !ismissing(r)])
end
end

"""
add_modification!(source)
Add custom modification. The first row is the header (can be empty), the first column is the name of modification, the second and third columns are addtional monoisotopic mass and average mass, respectively, and the other columns are the modification site.
^ repressents the N-terminal and \$ repressents the C-terminal.
See the example file "config_example/MODIFICATION.tsv".
"""
function add_modification!(source)
for m in CSV.Rows(source, delim = "\t")
push!(MODIFICATION_MS[1], m[1] => parse(Float64, m[2]))
push!(MODIFICATION_MS[2], m[1] => parse(Float64, m[3]))
push!(MODIFICATION_SITE, m[1] => [loc for loc in getindex.(Ref(m), 4:length(m)) if !ismissing(loc)])
end
end


end
22 changes: 4 additions & 18 deletions src/config.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,27 +66,13 @@ const ADD_MS = (
)
)

const MODIFICATION_SITE = Dict{String, Vector{String}}()
const MODIFICATION_SITE = Dict{String, Vector{String}}("3NPH" => ["D", "E", "\$"])

const MODIFICATION_MS = (
Dict{String, Float64}(),
Dict{String, Float64}()
Dict{String, Float64}("3NPH" => 135.043262),
Dict{String, Float64}("3NPH" => 135.12472)
)


for m in CSV.Rows("src/config/MODIFICATION.tsv", delim = "\t")
push!(MODIFICATION_MS[1], m[1] => parse(Float64, m[2]))
push!(MODIFICATION_MS[2], m[1] => parse(Float64, m[3]))
push!(MODIFICATION_SITE, m[1] => [loc for loc in getindex.(Ref(m), 4:length(m)) if !ismissing(loc)])
end

const ENZYME = Dict{String, Vector{Regex}}()

for e in CSV.Rows("src/config/ENZYME.tsv", delim = "\t")
push!(ENZYME, e[1] => [eval(Meta.parse(r)) for r in getindex.(Ref(e), 2:length(e)) if !ismissing(r)])
end

#=
const ENZYME = Dict(
"Trypsin" => [r"[K, R][^P]"], # R/KX, X≠P
"Trypsin(low specificity)" => [r"[K, R]."], # R/KX
Expand All @@ -110,7 +96,7 @@ const ENZYME = Dict(
"Trypsin/Chymotrypsin" => [r"[F, Y, W, L, X, K, R][^P]"],
"MAFA" => [r"D."] # DX
)
=#

const ADDUCT_FN = (
Dict(
"[M]" => identity,
Expand Down
19 changes: 16 additions & 3 deletions src/ms_spec.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Function related mass spectrometry

"""
ionize!(protein, adducts...)
Ionize the peptides as the `adducts`.
Available adducts are "[M]", "[M+H]+", "[M+2H]2+", "[M-H]-", "[M-2H]2-".
"""
function ionize!(protein::Protein, adducts::String...)
if CONFIG["ACCURATE"]
adduct_fn = first(ADDUCT_FN)
Expand All @@ -25,6 +31,13 @@ end
# y: +H => +H2O / peptide - b
# z: -NH2 => -NH3 + H2O / peptide - c

"""
fragmentation(peptide; ion_type = [:b, :y], charge_state = :auto)
Fragmentation of a peptide.
`ion_type` is the type of fragments. It can be an vector containing :a, :b, :c, :x, :y, :z. The default is [:b, :c] which are major fragments in CID or HCD.
`charge_state` determines the number of charges on the fragments. It can be a vector containing integers or a symbol. The default is `:auto` which means that doulbly charged fragments will be included for fragments containing more than 5 amino acids.
"""
function fragmentation(peptide::Peptide; ion_type = [:b, :y], charge_state = :auto)
if CONFIG["ACCURATE"]
aa_ms = first(AA_MS)
Expand Down Expand Up @@ -80,10 +93,10 @@ function _charge_fragments(neutral_fragments::Dict{Symbol, Vector{Float64}}, cha
id = 1
for ion in ion_type
for charge in charge_state
charge = charge == 1 ? "" : "$charge"
adduct = "[M$(ion_mode)$(charge)H]$(charge)$(ion_mode)"
charge = UPPER_INDEX[charge * ion_mode]
for (i, v) in enumerate(neutral_fragments[ion])
charge = charge == 1 ? "" : "$charge"
adduct = "[M$(ion_mode)$(charge)H]$(charge)$(ion_mode)"
charge = UPPER_INDEX[charge * ion_mode]
type[id] = String(ion) * Char(8320 + i) * charge
mass[id] = adduct_fn[adduct](v)
id += 1
Expand Down
68 changes: 42 additions & 26 deletions src/preparation.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,39 @@
# Digestion and modification

"""
modify!(protein, modification...)
Modify an intact protein or digested protein.
Modification must be a key of `MODIFICATION_SITE` or it will be ignored.
See object `MODIFICATION_SITE` for available modifications.
Currently, "3NPH" is supported.
"""
function modify!(protein::Protein, modification::String...)
# If digestion had been done, add mass to each peptides
if CONFIG["ACCURATE"]
modification_ms = first(MODIFICATION_MS)
else
modification_ms = last(MODIFICATION_MS)
end
isempty(protein.peptides) || return _modify_mass!(protein, modification...)

for k in modification
haskey(modification_ms, k) || continue
locs = Int[]
for loc in modification_ms[k]
if loc == "^"
push!(locs, 1)
elseif loc == "\$"
push!(locs, length(protein.origin))
else
append!(locs, findall(==(first(loc)), protein.origin))
end
end
protein.modification[k] = locs
end
protein
end

function modify!(protein::Protein, modification::Dict{String, Vector{Int}})
for (k, v) in modification
protein.modification[k] = sort!(union!(get(protein.modification, k, Int[]), v))
Expand Down Expand Up @@ -46,32 +80,6 @@ function _modify_mass!(protein::Protein, modification::String...)
protein
end

function modify!(protein::Protein, modification::String...)
# If digestion had been done, add mass to each peptides
if CONFIG["ACCURATE"]
modification_ms = first(MODIFICATION_MS)
else
modification_ms = last(MODIFICATION_MS)
end
isempty(protein.peptides) || return _modify_mass!(protein, modification...)

for k in modification
haskey(modification_ms, k) || continue
locs = Int[]
for loc in modification_ms[k]
if loc == "^"
push!(locs, 1)
elseif loc == "\$"
push!(locs, length(protein.origin))
else
append!(locs, findall(==(first(loc)), protein.origin))
end
end
protein.modification[k] = locs
end
protein
end

function full_digestion(sequence::AbstractString, enzyme::String)
regex = ENZYME[enzyme]
cleavage_sites = mapreduce(union, regex) do rs
Expand All @@ -97,6 +105,14 @@ function merge_modification(modifications)
new
end

"""
digest!(protein, n_miss, enzyme = "")
Digest a protein with an enzyme.
`n_miss` is the allowed number of miss cleavage.
If `protein.enzyme` is an empty string, `enzyme` must be provided.
See object `ENZYME` for available enzymes.
"""
function digest!(protein::Protein, n_miss::Int, enzyme::String = "")
if CONFIG["ACCURATE"]
aa_ms = first(AA_MS)
Expand Down
4 changes: 2 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ using Test
ionize!(p2, "[M+H]+", "[M+2H]2+")
fragmentation(p1.peptides[1])
fragmentation(p2.peptides[1])
fragmentation(p1.peptides[2])
fragmentation(p2.peptides[2])
fragmentation(p1.peptides[2]; charge_state = [1])
fragmentation(p2.peptides[2]; charge_state = [1, 2])
end

0 comments on commit 32b8118

Please sign in to comment.