Skip to content

Commit

Permalink
More modification and support for regular expression
Browse files Browse the repository at this point in the history
  • Loading branch information
yufongpeng committed Jul 22, 2022
1 parent 32b8118 commit 507be19
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 34 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ julia> using Pkg; Pkg.add("https://github.com/yufongpeng/PeptideSeq.jl")

## Example
```julia
julia> using PeptideSeq

julia> p = Protein("DPCHKPKRRKP")
Protein: DPCHKPKRRKP
Modification:
Expand Down
11 changes: 9 additions & 2 deletions config_example/MODIFICATION.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
Modification Accurate Mass Avearge Mass Sites
3NPH 135.043262 135.12472 D E $
Modification Accurate Mass Avearge Mass Sites
3NPH 135.043262 135.12472 D E $
N-acetylation 42.010565 42.03672 K ^
methylation 14.01565 14.02705 K R
O-Hex 162.052824 162.14072 S T
O-HexNAc 203.079374 162.14072 S T
hydroxylation 15.994915 15.999 K P
phosphorylation 79.966333 79.97872 S T Y H
N-GlcNAc 203.079374 203.19472 "r""K[^P][S, T]"""
4 changes: 2 additions & 2 deletions src/PeptideSeq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ end
add_modification!(source)
Add custom modification. The first row is the header (can be empty), the first column is the name of modification, the second and third columns are addtional monoisotopic mass and average mass, respectively, and the other columns are the modification site.
^ repressents the N-terminal and \$ repressents the C-terminal.
Modification sites can be string without quotation or regular expression like r"...". ^ repressents the N-terminal and \$ repressents the C-terminal.
See the example file "config_example/MODIFICATION.tsv".
"""
function add_modification!(source)
for m in CSV.Rows(source, delim = "\t")
push!(MODIFICATION_MS[1], m[1] => parse(Float64, m[2]))
push!(MODIFICATION_MS[2], m[1] => parse(Float64, m[3]))
push!(MODIFICATION_SITE, m[1] => [loc for loc in getindex.(Ref(m), 4:length(m)) if !ismissing(loc)])
push!(MODIFICATION_SITE, m[1] => [startwith(loc, "r") ? eval(Meta.parse(loc)) : loc for loc in getindex.(Ref(m), 4:length(m)) if !ismissing(loc)])
end
end

Expand Down
33 changes: 30 additions & 3 deletions src/config.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,38 @@ const ADD_MS = (
)
)

const MODIFICATION_SITE = Dict{String, Vector{String}}("3NPH" => ["D", "E", "\$"])
const MODIFICATION_SITE = Dict{String, Vector{Any}}(
"3NPH" => ["D", "E", "\$"],
"N-acetylation" => ["K", "^"],
"methylation" => ["K", "R"],
"O-Hex" => ["S", "T"],
"O-HexNAc" => ["S", "T"],
"hydroxylation" => ["K", "P"],
"phosphorylation" => ["S", "T", "Y", "H"],
"N-GlcNAc" => [r"K[^P][S, T]"]
)

const MODIFICATION_MS = (
Dict{String, Float64}("3NPH" => 135.043262),
Dict{String, Float64}("3NPH" => 135.12472)
Dict{String, Float64}(
"3NPH" => 135.043262,
"N-acetylation" => 42.010565,
"methylation" => 14.015650,
"O-Hex" => 162.052824,
"O-HexNAc" => 203.079374,
"hydroxylation" => 15.994915,
"phosphorylation" => 79.966333,
"N-GlcNAc" => 203.079374
),
Dict{String, Float64}(
"3NPH" => 135.12472,
"N-acetylation" => 42.03672,
"methylation" => 14.02705,
"O-Hex" => 162.14072,
"O-HexNAc" => 203.19472,
"hydroxylation" => 15.999,
"phosphorylation" => 79.97872,
"N-GlcNAc" => 203.19472
)
)

const ENZYME = Dict(
Expand Down
35 changes: 8 additions & 27 deletions src/preparation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,13 @@ See object `MODIFICATION_SITE` for available modifications.
Currently, "3NPH" is supported.
"""
function modify!(protein::Protein, modification::String...)
# If digestion had been done, add mass to each peptides
if CONFIG["ACCURATE"]
modification_ms = first(MODIFICATION_MS)
else
modification_ms = last(MODIFICATION_MS)
end
isempty(protein.peptides) || return _modify_mass!(protein, modification...)

for k in modification
haskey(modification_ms, k) || continue
haskey(MODIFICATION_SITE, k) || continue
locs = Int[]
for loc in modification_ms[k]
if loc == "^"
for loc in MODIFICATION_SITE[k]
if isa(loc, Regex)
append!(locs, locc.offset for locc in eachmatch(loc, protein.origin))
elseif loc == "^"
push!(locs, 1)
elseif loc == "\$"
push!(locs, length(protein.origin))
Expand All @@ -31,7 +25,8 @@ function modify!(protein::Protein, modification::String...)
end
protein.modification[k] = locs
end
protein
# If digestion had been done, add mass to each peptides
return isempty(protein.peptides) ? protein : _modify_mass!(protein, modification...)
end

function modify!(protein::Protein, modification::Dict{String, Vector{Int}})
Expand All @@ -52,20 +47,6 @@ function _modify_mass!(protein::Protein, modification::String...)
modification_ms = last(MODIFICATION_MS)
end

for k in modification
haskey(MODIFICATION_SITE, k) || continue
locs = Int[]
for loc in MODIFICATION_SITE[k]
append!(locs, findall(==(first(loc)), protein.origin))
if loc == "^"
push!(locs, 0)
elseif loc == "\$"
push!(locs, -1)
end
end
protein.modification[k] = locs
end

for (k, v) in protein.modification
for (i, pep) in enumerate(protein.peptides)
id = filter(in(v), pep.position)
Expand Down Expand Up @@ -125,7 +106,7 @@ function digest!(protein::Protein, n_miss::Int, enzyme::String = "")
end

if enzyme == ""
protein.enzyme != "" && throw(ArgumentError("Please provide enzyme!"))
protein.enzyme == "" && throw(ArgumentError("Please provide enzyme!"))
else
protein.enzyme = enzyme
end
Expand Down

0 comments on commit 507be19

Please sign in to comment.