From 3c63affc5092aed1b8aab388f1b8ee5950109937 Mon Sep 17 00:00:00 2001 From: CaptnClementine <131146976+CaptnClementine@users.noreply.github.com> Date: Tue, 26 Sep 2023 14:23:42 +0400 Subject: [PATCH 01/16] Create README.md and shared folder --- HW4_Gorbarenko/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 HW4_Gorbarenko/README.md diff --git a/HW4_Gorbarenko/README.md b/HW4_Gorbarenko/README.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/HW4_Gorbarenko/README.md @@ -0,0 +1 @@ + From a82ffedf355778a0261e3da39b6c18c4092feaeb Mon Sep 17 00:00:00 2001 From: Anastasia Date: Wed, 27 Sep 2023 18:39:45 +0300 Subject: [PATCH 02/16] Add 'is_aa' function to check if a sequence contains amino acids --- HW4_Gorbarenko/amino_analyzer.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 HW4_Gorbarenko/amino_analyzer.py diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py new file mode 100644 index 0000000..5b6caa4 --- /dev/null +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -0,0 +1,15 @@ +def is_aa(seq: str) -> bool: + """ + Check if a sequence contains only amino acids. + + Args: + seq (str): The input sequфence to be checked. + + Returns: + bool: True if the sequence contains only amino acids, False otherwise. + """ + aa_list = ['V', 'I', 'L', 'E', 'Q', 'D', 'N', 'H', 'W', 'F', 'Y', 'R', 'K', 'S', 'T', 'M', 'A', 'G', 'P', 'C', + 'v', 'i', 'l', 'e', 'q', 'd', 'n', 'h', 'w', 'f', 'y', 'r', 'k', 's', 't', 'm', 'a', 'g', 'p', 'c'] + unique_chars = set(seq) + amino_acids = set(aa_list) + return unique_chars <= amino_acids From 445fde1cf836c74c5c1564d82094be8facf87272 Mon Sep 17 00:00:00 2001 From: Anastasia Date: Wed, 27 Sep 2023 18:48:23 +0300 Subject: [PATCH 03/16] Add 'choose_weight' function --- HW4_Gorbarenko/amino_analyzer.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index 5b6caa4..13d3e8a 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -1,3 +1,5 @@ +from typing import List + def is_aa(seq: str) -> bool: """ Check if a sequence contains only amino acids. @@ -13,3 +15,28 @@ def is_aa(seq: str) -> bool: unique_chars = set(seq) amino_acids = set(aa_list) return unique_chars <= amino_acids + + + +def choose_weight(weight: str) -> List[float]: + """ + Choose the weight type of amino acids - average or monoisotopic. + + Args: + weight (str): The type of weight to choose, either 'average' or 'monoisotopic'. + + Returns: + List[float]: A list of amino acid weights based on the chosen type. + """ + if weight == 'average': + average_weights = [71.0788, 156.1875, 114.1038, 115.0886, 103.1388, 129.1155, 128.1307, 57.0519, 137.1411, 113.1594, + 113.1594, 128.1741, 131.1926, 147.1766, 97.1167, 87.0782, 101.1051, 186.2132, 163.1760, 99.1326] + weights_aa = average_weights + elif weight == 'monoisotopic': + monoisotopic_weights = [71.03711, 156.10111, 114.04293, 115.02694, 103.00919, 129.04259, 128.05858, 57.02146, 137.05891, 113.08406, + 113.08406, 128.09496, 131.04049, 147.06841, 97.05276, 87.03203, 101.04768, 186.07931, 163.06333, 99.06841] + weights_aa = monoisotopic_weights + else: + raise ValueError(f"I do not know what '{weight}' is :( \n Read help or just do not write anything except your sequence") + + return weights_aa From d13d869836156f490e39c9645e6b9e2c2bd67f64 Mon Sep 17 00:00:00 2001 From: Anastasia Date: Wed, 27 Sep 2023 18:57:59 +0300 Subject: [PATCH 04/16] Add 'aa_weight' function to calculate amino acids weight --- HW4_Gorbarenko/amino_analyzer.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index 13d3e8a..b2123ab 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -40,3 +40,23 @@ def choose_weight(weight: str) -> List[float]: raise ValueError(f"I do not know what '{weight}' is :( \n Read help or just do not write anything except your sequence") return weights_aa + + +def aa_weight(seq: str, weight: str = 'average') -> float: + """ + Calculate the amino acids weight in a protein sequence. + + Args: + seq (str): The amino acid sequence to calculate the weight for. + weight (str, optional): The type of weight to use, either 'average' or 'monoisotopic'. Default is 'average'. + + Returns: + float: The calculated weight of the amino acid sequence. + """ + aa_list = str('A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V').split(',') + weights_aa = choose_weight(weight) + aa_to_weight = dict(zip(aa_list, weights_aa)) + final_weight = 0 + for i in seq.upper(): + final_weight += aa_to_weight.get(i, 0) + return round(final_weight, 3) From 96321d8fa549361832bd0235d708ce2cb32d3cd2 Mon Sep 17 00:00:00 2001 From: Anastasia Date: Wed, 27 Sep 2023 21:07:58 +0300 Subject: [PATCH 05/16] Add amino acid hydrophobic and hydrophilic counting function 'count_hydroaffinity' --- HW4_Gorbarenko/amino_analyzer.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index b2123ab..ee130eb 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -60,3 +60,30 @@ def aa_weight(seq: str, weight: str = 'average') -> float: for i in seq.upper(): final_weight += aa_to_weight.get(i, 0) return round(final_weight, 3) + + +def count_hydroaffinity(seq: str) -> tuple: + """ + Count the quantity of hydrophobic and hydrophilic amino acids in a protein sequence. + + Args: + seq (str): The protein sequence for which to count hydrophobic and hydrophilic amino acids. + + Returns: + tuple: A tuple containing the count of hydrophobic and hydrophilic amino acids, respectively. + """ + hydrophobic_aa = ['A', 'V', 'L', 'I', 'P', 'F', 'W', 'M'] + hydrophilic_aa = ['R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'K', 'S', 'T', 'Y'] + + hydrophobic_count = 0 + hydrophilic_count = 0 + + seq = seq.upper() + + for aa in seq: + if aa in hydrophobic_aa: + hydrophobic_count += 1 + elif aa in hydrophilic_aa: + hydrophilic_count += 1 + + return hydrophobic_count, hydrophilic_count From ef1e6fdf71ded3f59dd001f56ef2847e52e0c610 Mon Sep 17 00:00:00 2001 From: AO Date: Sat, 30 Sep 2023 00:51:44 +0300 Subject: [PATCH 06/16] Add 'peptide_cutter' function --- HW4_Gorbarenko/amino_analyzer.py | 141 ++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 1 deletion(-) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index ee130eb..8e7e45f 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -53,7 +53,7 @@ def aa_weight(seq: str, weight: str = 'average') -> float: Returns: float: The calculated weight of the amino acid sequence. """ - aa_list = str('A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V').split(',') + aa_list = str('A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V').split(', ') weights_aa = choose_weight(weight) aa_to_weight = dict(zip(aa_list, weights_aa)) final_weight = 0 @@ -87,3 +87,142 @@ def count_hydroaffinity(seq: str) -> tuple: hydrophilic_count += 1 return hydrophobic_count, hydrophilic_count + + +def peptide_cutter(sequence: str, enzyme: str = "trypsin") -> str: + """ + This function identifies cleavage sites in a given peptide sequence using a specified enzyme. + + Args: + sequence (str): The input peptide sequence. + enzyme (str): The enzyme to be used for cleavage. Choose between "trypsin" and "chymotrypsin". Default is "trypsin". + + Returns: + str: A message indicating the number and positions of cleavage sites, or an error message if an invalid enzyme is provided. + """ + cleavage_sites = [] + if enzyme not in ("trypsin", "chymotrypsin"): + return "You have chosen an enzyme that is not provided. Please choose between trypsin and chymotrypsin." + + if enzyme == "trypsin": # Trypsin cuts peptide chains mainly at the carboxyl side of the amino acids lysine or arginine. + for i in range(len(sequence)-1): + if sequence[i] in ['K', 'R', 'k', 'r'] and sequence[i+1] not in ['P','p']: + cleavage_sites.append(i+1) + + if enzyme == "chymotrypsin": # Chymotrypsin preferentially cleaves at Trp, Tyr and Phe in position P1(high specificity) + for i in range(len(sequence)-1): + if sequence[i] in ['W', 'Y', 'F', 'w', 'y', 'f'] and sequence[i+1] not in ['P','p']: + cleavage_sites.append(i+1) + + if cleavage_sites: + return f"Found {len(cleavage_sites)} {enzyme} cleavage sites at positions {', '.join(map(str, cleavage_sites))}" + else: + return f"No {enzyme} cleavage sites were found." + + +def one_to_three_letter_code(sequence: str) -> str: + """ + This function converts a protein sequence from one-letter amino acid code to three-letter code. + + Args: + sequence (str): The input protein sequence in one-letter code. + + Returns: + str: The converted protein sequence in three-letter code. + """ + amino_acids = { + 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', 'F': 'Phe', + 'G': 'Gly', 'H': 'His', 'I': 'Ile', 'K': 'Lys', 'L': 'Leu', + 'M': 'Met', 'N': 'Asn', 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', + 'S': 'Ser', 'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr' + } + + three_letter_code = [amino_acids.get(aa.upper()) for aa in sequence] + + return ''.join(three_letter_code) + +def sulphur_containing_aa_counter(sequence): + """ + This function counts sulphur-containing amino acids in a protein sequence. + + Args: + sequence (str): The input protein sequence in one-letter code. + + Returns: + str: The number of sulphur-containing amino acids in a protein sequence. + """ + counter = 0 + for i in sequence: + if i == 'C' or i == 'M': + counter += 1 + answer = str(counter) + return 'The number of sulphur-containing amino acids in the sequence is equal to ' + answer + +def run_amino_analyzer(sequence, procedure, *, weight_type = 'average'): + """ + This is the main function to run the amino-analyzer.py tool. + + Args: + sequence (str): The input protein sequence in one-letter code. + procedure (str): amino-analyzer.py tool has 5 functions at all: + 1. aa_weight - Calculate the amino acids weight in a protein sequence. + 2. count_hydroaffinity - Count the quantity of hydrophobic and hydrophilic amino acids in a protein sequence. + 3. peptide_cutter - This function identifies cleavage sites in a given peptide sequence using a specified enzyme. + 4. one_to_three_letter_code - This function converts a protein sequence from one-letter amino acid code to three-letter code. + 5. sulphur_containing_aa_counter - This function counts sulphur-containing amino acids in a protein sequence. + weight_type = 'average': default argument for 'aa_weight' function. weight_type = 'monoisotopic' can be used as a second option. + + Returns: + The result of the procedure. + """ + + procedures = ['aa_weight', 'count_hydroaffinity', 'peptide_cutter', 'one_to_three_letter_code', 'sulphur_containing_aa_counter'] + if procedure not in procedures: + raise ValueError(f"Incorrect procedure. Acceptable procedures: {', '.join(procedures)}") + + for i in sequence: + if not is_aa(sequence): + raise ValueError("Incorrect sequence. Only amino acids are allowed (V, I, L, E, Q, D, N, H, W, F, Y, R, K, S, T, M, A, G, P, C, v, i, l, e, q, d, n, h, w, f, y, r, k, s, t, m, a, g, p, c).") + + if procedure == 'aa_weight': + result = aa_weight(sequence, weight_type) + elif procedure == 'count_hydroaffinity': + result = count_hydroaffinity(sequence) + elif procedure == 'peptide_cutter': + result = peptide_cutter(sequence) + elif procedure == 'one_to_three_letter_code': + result = one_to_three_letter_code(sequence) + elif procedure == 'sulphur_containing_aa_counter': + result = sulphur_containing_aa_counter(sequence) + return result + + +def peptide_cutter(sequence: str, enzyme: str = "trypsin") -> str: + """ + This function identifies cleavage sites in a given peptide sequence using a specified enzyme. + + Args: + sequence (str): The input peptide sequence. + enzyme (str): The enzyme to be used for cleavage. Choose between "trypsin" and "chymotrypsin". Default is "trypsin". + + Returns: + str: A message indicating the number and positions of cleavage sites, or an error message if an invalid enzyme is provided. + """ + cleavage_sites = [] + if enzyme not in ("trypsin", "chymotrypsin"): + return "You have chosen an enzyme that is not provided. Please choose between trypsin and chymotrypsin." + + if enzyme == "trypsin": # Trypsin cuts peptide chains mainly at the carboxyl side of the amino acids lysine or arginine. + for i in range(len(sequence)-1): + if sequence[i] in ['K', 'R', 'k', 'r'] and sequence[i+1] not in ['P','p']: + cleavage_sites.append(i+1) + + if enzyme == "chymotrypsin": # Chymotrypsin preferentially cleaves at Trp, Tyr and Phe in position P1(high specificity) + for i in range(len(sequence)-1): + if sequence[i] in ['W', 'Y', 'F', 'w', 'y', 'f'] and sequence[i+1] not in ['P','p']: + cleavage_sites.append(i+1) + + if cleavage_sites: + return f"Found {len(cleavage_sites)} {enzyme} cleavage sites at positions {', '.join(map(str, cleavage_sites))}" + else: + return f"No {enzyme} cleavage sites were found." \ No newline at end of file From 6182c31e922b4daeaa1eae673778a41f1c0235de Mon Sep 17 00:00:00 2001 From: AO Date: Sat, 30 Sep 2023 00:56:01 +0300 Subject: [PATCH 07/16] Add 'one_to_three_letter_code' function --- HW4_Gorbarenko/amino_analyzer.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index 8e7e45f..aa0a02f 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -53,7 +53,7 @@ def aa_weight(seq: str, weight: str = 'average') -> float: Returns: float: The calculated weight of the amino acid sequence. """ - aa_list = str('A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V').split(', ') + aa_list = str('A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V').split(', ') weights_aa = choose_weight(weight) aa_to_weight = dict(zip(aa_list, weights_aa)) final_weight = 0 @@ -225,4 +225,25 @@ def peptide_cutter(sequence: str, enzyme: str = "trypsin") -> str: if cleavage_sites: return f"Found {len(cleavage_sites)} {enzyme} cleavage sites at positions {', '.join(map(str, cleavage_sites))}" else: - return f"No {enzyme} cleavage sites were found." \ No newline at end of file + return f"No {enzyme} cleavage sites were found." + + +def one_to_three_letter_code(sequence: str) -> str: + """ + This function converts a protein sequence from one-letter amino acid code to three-letter code. + + Args: + sequence (str): The input protein sequence in one-letter code. + + Returns: + str: The converted protein sequence in three-letter code. + """ + amino_acids = { + 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', 'F': 'Phe', + 'G': 'Gly', 'H': 'His', 'I': 'Ile', 'K': 'Lys', 'L': 'Leu', + 'M': 'Met', 'N': 'Asn', 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', + 'S': 'Ser', 'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr' + } + + three_letter_code = [amino_acids.get(aa.upper()) for aa in sequence] + return ''.join(three_letter_code) From f6a435a3c0f048d081d1acbdaf4a385eacca1762 Mon Sep 17 00:00:00 2001 From: iliapopov17 Date: Sat, 30 Sep 2023 10:16:52 +0300 Subject: [PATCH 08/16] Add function 'sulphur_containing_aa_counter()' Add function 'sulphur_containing_aa_counter()' to the 'amino_analyzer.py' tool that counts sulphur-containing amino acids (Cysteine and Methionine) in a protein sequence. --- HW4_Gorbarenko/amino_analyzer.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index aa0a02f..887aa82 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -247,3 +247,20 @@ def one_to_three_letter_code(sequence: str) -> str: three_letter_code = [amino_acids.get(aa.upper()) for aa in sequence] return ''.join(three_letter_code) + +def sulphur_containing_aa_counter(sequence): + """ + This function counts sulphur-containing amino acids (Cysteine and Methionine) in a protein sequence. + + Args: + sequence (str): The input protein sequence in one-letter code. + + Returns: + str: The number of sulphur-containing amino acids in a protein sequence. + """ + counter = 0 + for i in sequence: + if i == 'C' or i == 'M': + counter += 1 + answer = str(counter) + return 'The number of sulphur-containing amino acids in the sequence is equal to ' + answer \ No newline at end of file From dbad46c5104e9e8cf4b936e0f34cff80316dace6 Mon Sep 17 00:00:00 2001 From: iliapopov17 Date: Sat, 30 Sep 2023 10:18:07 +0300 Subject: [PATCH 09/16] Add main function to run the tool Add 'run_amino_analyzer()' function - the main function to run the amino-analyzer.py tool. --- HW4_Gorbarenko/amino_analyzer.py | 40 +++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index 887aa82..bc68cff 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -263,4 +263,42 @@ def sulphur_containing_aa_counter(sequence): if i == 'C' or i == 'M': counter += 1 answer = str(counter) - return 'The number of sulphur-containing amino acids in the sequence is equal to ' + answer \ No newline at end of file + return 'The number of sulphur-containing amino acids in the sequence is equal to ' + answer + +def run_amino_analyzer(sequence, procedure, *, weight_type = 'average'): + """ + This is the main function to run the amino-analyzer.py tool. + + Args: + sequence (str): The input protein sequence in one-letter code. + procedure (str): amino-analyzer.py tool has 5 functions at all: + 1. aa_weight - Calculate the amino acids weight in a protein sequence. + 2. count_hydroaffinity - Count the quantity of hydrophobic and hydrophilic amino acids in a protein sequence. + 3. peptide_cutter - This function identifies cleavage sites in a given peptide sequence using a specified enzyme. + 4. one_to_three_letter_code - This function converts a protein sequence from one-letter amino acid code to three-letter code. + 5. sulphur_containing_aa_counter - This function counts sulphur-containing amino acids in a protein sequence. + weight_type = 'average': default argument for 'aa_weight' function. weight_type = 'monoisotopic' can be used as a second option. + + Returns: + The result of the procedure. + """ + + procedures = ['aa_weight', 'count_hydroaffinity', 'peptide_cutter', 'one_to_three_letter_code', 'sulphur_containing_aa_counter'] + if procedure not in procedures: + raise ValueError(f"Incorrect procedure. Acceptable procedures: {', '.join(procedures)}") + + for i in sequence: + if not is_aa(sequence): + raise ValueError("Incorrect sequence. Only amino acids are allowed (V, I, L, E, Q, D, N, H, W, F, Y, R, K, S, T, M, A, G, P, C, v, i, l, e, q, d, n, h, w, f, y, r, k, s, t, m, a, g, p, c).") + + if procedure == 'aa_weight': + result = aa_weight(sequence, weight_type) + elif procedure == 'count_hydroaffinity': + result = count_hydroaffinity(sequence) + elif procedure == 'peptide_cutter': + result = peptide_cutter(sequence) + elif procedure == 'one_to_three_letter_code': + result = one_to_three_letter_code(sequence) + elif procedure == 'sulphur_containing_aa_counter': + result = sulphur_containing_aa_counter(sequence) + return result \ No newline at end of file From c9373771d3b68496716cfd77add63af68e545d25 Mon Sep 17 00:00:00 2001 From: CaptnClementine <131146976+CaptnClementine@users.noreply.github.com> Date: Sat, 30 Sep 2023 11:48:28 +0300 Subject: [PATCH 10/16] Fix function 'aa_weight' --- HW4_Gorbarenko/amino_analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index bc68cff..b9b5b79 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -53,7 +53,7 @@ def aa_weight(seq: str, weight: str = 'average') -> float: Returns: float: The calculated weight of the amino acid sequence. """ - aa_list = str('A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V').split(', ') + aa_list = str('A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V').split(', ') weights_aa = choose_weight(weight) aa_to_weight = dict(zip(aa_list, weights_aa)) final_weight = 0 @@ -301,4 +301,4 @@ def run_amino_analyzer(sequence, procedure, *, weight_type = 'average'): result = one_to_three_letter_code(sequence) elif procedure == 'sulphur_containing_aa_counter': result = sulphur_containing_aa_counter(sequence) - return result \ No newline at end of file + return result From f13125a225d9d06693802455955b786d89dee75c Mon Sep 17 00:00:00 2001 From: iliapopov17 Date: Sat, 30 Sep 2023 15:17:34 +0300 Subject: [PATCH 11/16] Delete repeated elements in code 1. Delete repeated elements in code 2. Argument types are specified for 'sulphur_containing_aa_counter()' and 'run_amino_analyzer()' functions --- HW4_Gorbarenko/amino_analyzer.py | 116 ++----------------------------- 1 file changed, 4 insertions(+), 112 deletions(-) diff --git a/HW4_Gorbarenko/amino_analyzer.py b/HW4_Gorbarenko/amino_analyzer.py index b9b5b79..3c0a181 100644 --- a/HW4_Gorbarenko/amino_analyzer.py +++ b/HW4_Gorbarenko/amino_analyzer.py @@ -88,115 +88,6 @@ def count_hydroaffinity(seq: str) -> tuple: return hydrophobic_count, hydrophilic_count - -def peptide_cutter(sequence: str, enzyme: str = "trypsin") -> str: - """ - This function identifies cleavage sites in a given peptide sequence using a specified enzyme. - - Args: - sequence (str): The input peptide sequence. - enzyme (str): The enzyme to be used for cleavage. Choose between "trypsin" and "chymotrypsin". Default is "trypsin". - - Returns: - str: A message indicating the number and positions of cleavage sites, or an error message if an invalid enzyme is provided. - """ - cleavage_sites = [] - if enzyme not in ("trypsin", "chymotrypsin"): - return "You have chosen an enzyme that is not provided. Please choose between trypsin and chymotrypsin." - - if enzyme == "trypsin": # Trypsin cuts peptide chains mainly at the carboxyl side of the amino acids lysine or arginine. - for i in range(len(sequence)-1): - if sequence[i] in ['K', 'R', 'k', 'r'] and sequence[i+1] not in ['P','p']: - cleavage_sites.append(i+1) - - if enzyme == "chymotrypsin": # Chymotrypsin preferentially cleaves at Trp, Tyr and Phe in position P1(high specificity) - for i in range(len(sequence)-1): - if sequence[i] in ['W', 'Y', 'F', 'w', 'y', 'f'] and sequence[i+1] not in ['P','p']: - cleavage_sites.append(i+1) - - if cleavage_sites: - return f"Found {len(cleavage_sites)} {enzyme} cleavage sites at positions {', '.join(map(str, cleavage_sites))}" - else: - return f"No {enzyme} cleavage sites were found." - - -def one_to_three_letter_code(sequence: str) -> str: - """ - This function converts a protein sequence from one-letter amino acid code to three-letter code. - - Args: - sequence (str): The input protein sequence in one-letter code. - - Returns: - str: The converted protein sequence in three-letter code. - """ - amino_acids = { - 'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu', 'F': 'Phe', - 'G': 'Gly', 'H': 'His', 'I': 'Ile', 'K': 'Lys', 'L': 'Leu', - 'M': 'Met', 'N': 'Asn', 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', - 'S': 'Ser', 'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr' - } - - three_letter_code = [amino_acids.get(aa.upper()) for aa in sequence] - - return ''.join(three_letter_code) - -def sulphur_containing_aa_counter(sequence): - """ - This function counts sulphur-containing amino acids in a protein sequence. - - Args: - sequence (str): The input protein sequence in one-letter code. - - Returns: - str: The number of sulphur-containing amino acids in a protein sequence. - """ - counter = 0 - for i in sequence: - if i == 'C' or i == 'M': - counter += 1 - answer = str(counter) - return 'The number of sulphur-containing amino acids in the sequence is equal to ' + answer - -def run_amino_analyzer(sequence, procedure, *, weight_type = 'average'): - """ - This is the main function to run the amino-analyzer.py tool. - - Args: - sequence (str): The input protein sequence in one-letter code. - procedure (str): amino-analyzer.py tool has 5 functions at all: - 1. aa_weight - Calculate the amino acids weight in a protein sequence. - 2. count_hydroaffinity - Count the quantity of hydrophobic and hydrophilic amino acids in a protein sequence. - 3. peptide_cutter - This function identifies cleavage sites in a given peptide sequence using a specified enzyme. - 4. one_to_three_letter_code - This function converts a protein sequence from one-letter amino acid code to three-letter code. - 5. sulphur_containing_aa_counter - This function counts sulphur-containing amino acids in a protein sequence. - weight_type = 'average': default argument for 'aa_weight' function. weight_type = 'monoisotopic' can be used as a second option. - - Returns: - The result of the procedure. - """ - - procedures = ['aa_weight', 'count_hydroaffinity', 'peptide_cutter', 'one_to_three_letter_code', 'sulphur_containing_aa_counter'] - if procedure not in procedures: - raise ValueError(f"Incorrect procedure. Acceptable procedures: {', '.join(procedures)}") - - for i in sequence: - if not is_aa(sequence): - raise ValueError("Incorrect sequence. Only amino acids are allowed (V, I, L, E, Q, D, N, H, W, F, Y, R, K, S, T, M, A, G, P, C, v, i, l, e, q, d, n, h, w, f, y, r, k, s, t, m, a, g, p, c).") - - if procedure == 'aa_weight': - result = aa_weight(sequence, weight_type) - elif procedure == 'count_hydroaffinity': - result = count_hydroaffinity(sequence) - elif procedure == 'peptide_cutter': - result = peptide_cutter(sequence) - elif procedure == 'one_to_three_letter_code': - result = one_to_three_letter_code(sequence) - elif procedure == 'sulphur_containing_aa_counter': - result = sulphur_containing_aa_counter(sequence) - return result - - def peptide_cutter(sequence: str, enzyme: str = "trypsin") -> str: """ This function identifies cleavage sites in a given peptide sequence using a specified enzyme. @@ -248,7 +139,7 @@ def one_to_three_letter_code(sequence: str) -> str: three_letter_code = [amino_acids.get(aa.upper()) for aa in sequence] return ''.join(three_letter_code) -def sulphur_containing_aa_counter(sequence): +def sulphur_containing_aa_counter(sequence: str) -> str: """ This function counts sulphur-containing amino acids (Cysteine and Methionine) in a protein sequence. @@ -265,7 +156,7 @@ def sulphur_containing_aa_counter(sequence): answer = str(counter) return 'The number of sulphur-containing amino acids in the sequence is equal to ' + answer -def run_amino_analyzer(sequence, procedure, *, weight_type = 'average'): +def run_amino_analyzer(sequence: str, procedure: str, *, weight_type: str = 'average', enzyme: str = 'trypsin'): """ This is the main function to run the amino-analyzer.py tool. @@ -278,6 +169,7 @@ def run_amino_analyzer(sequence, procedure, *, weight_type = 'average'): 4. one_to_three_letter_code - This function converts a protein sequence from one-letter amino acid code to three-letter code. 5. sulphur_containing_aa_counter - This function counts sulphur-containing amino acids in a protein sequence. weight_type = 'average': default argument for 'aa_weight' function. weight_type = 'monoisotopic' can be used as a second option. + enzyme = 'trypsin': default argument for 'peptide_cutter' function. enzyme = 'chymotrypsin' can be used as a second option. Returns: The result of the procedure. @@ -296,7 +188,7 @@ def run_amino_analyzer(sequence, procedure, *, weight_type = 'average'): elif procedure == 'count_hydroaffinity': result = count_hydroaffinity(sequence) elif procedure == 'peptide_cutter': - result = peptide_cutter(sequence) + result = peptide_cutter(sequence, enzyme) elif procedure == 'one_to_three_letter_code': result = one_to_three_letter_code(sequence) elif procedure == 'sulphur_containing_aa_counter': From 9efbeda43314e3ca36b51d5a74015de0b339cb96 Mon Sep 17 00:00:00 2001 From: Cucumberan <122701551+Cucumberan@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:28:43 +0400 Subject: [PATCH 12/16] Add README.md --- HW4_Gorbarenko/README.md | 168 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/HW4_Gorbarenko/README.md b/HW4_Gorbarenko/README.md index 8b13789..5e7bb92 100644 --- a/HW4_Gorbarenko/README.md +++ b/HW4_Gorbarenko/README.md @@ -1 +1,169 @@ +# Welcome to amino_analyzer tool + +## Overview +The amino_analyzer is an easy-to-use Python tool designed to facilitate the comprehensive analysis of protein sequences. It provides a broad functionality from basic checks for valid amino acid sequences to more complicated computations like molecular weights, hydrophobicity analysis, and cleavage site identification. + +## :green_heart: Key features + +### 1. Protein molecular weight calculation +The amino_analyzer offers the capability to calculate the molecular weight of a protein sequence. Users can choose between average and monoisotopic weights. +### 2. Hydrophobicity analysis +This function counts the quantity of hydrophobic and hydrophilic amino acids within a protein sequence. +### 3. Cleavage site identification +Researchers can identify cleavage sites in a given peptide sequence using a specified enzyme. The tool currently supports two commonly used enzymes, trypsin and chymotrypsin. +### 4. One-letter to three-Letter code conversion +The amino_analyzer provides a function to convert a protein sequence from the standard one-letter amino acid code to the three-letter code. +#### 5. Sulphur-containing amino acid counting +The tool allows a quick determine the number of sulphur-containing amino acids, namely Cysteine (C) and Methionine (M), within a protein sequence. + +## Usage + +To run amino_analyzer tool you need to use the main function ***run_amino_analyzer*** with the following arguments: + +```python +from from amino_analyzer import run_amino_analyzer +run_amino_analyzer(sequence, procedure, *, weight_type = 'average', enzyme: str = 'trypsine')` +``` + +- `sequence (str):` The input protein sequence in one-letter code. +- `procedure (str):` The procedure to perform over your protein sequence. +- `weight_type: str = 'average':` default argument for `aa_weight` function. `weight_type = 'monoisotopic'` can be used as another option. +- `enzyme: str = 'trypsine':` default argument for `peptide_cutter` function. `enzyme = 'chymotrypsin'` can be used as another option + + +**Available procedures list** +- `aa_weight` — calculates the amino acids weight in a protein sequence. +- `count_hydroaffinity` — counts the quantity of hydrophobic and hydrophilic amino acids in a protein sequence. +- `peptide_cutter` — identifies cleavage sites in a given peptide sequence using a specified enzyme (trypsine or chymotripsine). +- `one_to_three_letter_code` — converts a protein sequence from one-letter amino acid code to three-letter code. +- `sulphur_containing_aa_counter` - counts sulphur-containing amino acids in a protein sequence. + +You can also use each function separately by importing them in advance. Below are the available functions and their respective purposes: + +#### 1. **aa_weight** function calculates the weight of amino acids in a protein sequence: + The type of weight to use, either `average` or `monoisotopic`. Default is `average`. +```python +from amino_analyzer import aa_weight +aa_weight(seq: str, weight: str = `average`) -> float` +``` +```python +sequence = "VLDQRKSTMA" +result = aa_weight(sequence, weight='monoisotopic') +print(result) # Output: 1348.517 +``` + +#### 2. **count_hydroaffinity** сounts the quantity of hydrophobic and hydrophilic amino acids in a protein sequence: +```python +from amino_analyzer import count_hydroaffinity +count_hydroaffinity(seq: str) -> tuple +``` +```python +sequence = "VLDQRKSTMA" +result = count_hydroaffinity(sequence) +print(result) # Output: (3, 7) +``` +#### 3. **peptide_cutter** function identifies cleavage sites in a given peptide sequence using a specified enzyme: trypsine or chymotrypsine: +```python +from amino_analyzer import peptide_cutter +peptide_cutter(sequence: str, enzyme: str = "trypsin") -> str +``` +```python +sequence = "VLDQRKSTMA" +result = peptide_cutter(sequence, enzyme="trypsin") +print(result) # Output: Found 2 trypsin cleavage sites at positions 3, 6 +``` +#### 4. **one_to_three_letter_code** converts a protein sequence from one-letter amino acid code to three-letter code. +```python +from amino_analyzer import one_to_three_letter_code +one_to_three_letter_code(sequence: str) -> str +``` + +```python +sequence = "VLDQRKSTMA" +result = one_to_three_letter_code(sequence) +print(result) # Output: ValLeuAspGlnArgLysSerThrMetAla +``` + +#### 5. **sulphur_containing_aa_counter** counts sulphur-containing amino acids in a protein sequence +```python +from amino_analyzer import sulphur_containing_aa_counter +sulphur_containing_aa_counter(sequence: str) -> str +``` +```python +sequence = "VLDQRKSTMA" +result = sulphur_containing_aa_counter(sequence) +print(result) # Output: The number of sulphur-containing amino acids in the sequence is equal to 2 +``` + +## Examples +To calculate protein molecular weight: +```python +run_amino_analyzer("VLSPADKTNVKAAW", "aa_weight") # Output: 1481.715 + +run_amino_analyzer("VLSPADKTNVKAAW", "aa_weight", weight_type = 'monoisotopic') # Output: 1480.804 +``` + +To count hydroaffinity: +```python +run_amino_analyzer("VLSPADKTNVKAAW", "count_hydroaffinity") # Output: (8, 6) +``` + +To find trypsin/chymotripsine clivage sites: +```python +run_amino_analyzer("VLSPADKTNVKAAW", "peptide_cutter") # Output: 'Found 2 trypsin cleavage sites at positions 7, 11' + +run_amino_analyzer("VLSPADKTNVKAAWW", "peptide_cutter", enzyme = 'chymotrypsin') # Output: 'Found 1 chymotrypsin cleavage sites at positions 14' +``` + +To change to 3-letter code and count sulphur-containing amino acids. +```python +run_amino_analyzer("VLSPADKTNVKAAW", "one_to_three_letter_code") # Output: 'ValLeuSerProAlaAspLysThrAsnValLysAlaAlaTrp' + +run_amino_analyzer("VLSPADKTNVKAAWM", "sulphur_containing_aa_counter") # Output: The number of sulphur-containing amino acids in the sequence is equal to 1 +``` + +## Troubleshooting +Here are some common issues you can come ascross while using the amino-analyzer tool and their possible solutions: + +1. **ValueError: Incorrect procedure** + If you receive this error, it means that you provided an incorrect procedure when calling `run_amino_analyzer`. Make sure you choose one of the following procedures: `aa_weight`, `count_hydroaffinity`, `peptide_cutter`, `one_to_three_letter_code`, or `sulphur_containing_aa_counter`. + + Example: + ```python + run_amino_analyzer("VLSPADKTNVKAAW", "incorrect_procedure") + # Output: ValueError: Incorrect procedure. Acceptable procedures: aa_weight, count_hydroaffinity, peptide_cutter, one_to_three_letter_code, sulphur_containing_aa_counter + ``` + +2. **ValueError: Incorrect sequence** +This error occurs if the input sequence provided to run_amino_analyzer contains characters that are not valid amino acids. Make sure your sequence only contains valid amino acid characters (V, I, L, E, Q, D, N, H, W, F, Y, R, K, S, T, M, A, G, P, C, v, i, l, e, q, d, n, h, w, f, y, r, k, s, t, m, a, g, p, c). + + Example: + ```python + run_amino_analyzer("VLSPADKTNVKAAW!", "aa_weight") + # Output: ValueError: Incorrect sequence. Only amino acids are allowed (V, I, L, E, Q, D, N, H, W, F, Y, R, K, S, T, M, A, G, P, C, v, i, l, e, q, d, n, h, w, f, y, r, k, s, t, m, a, g, p, c). + ``` + +3. **ValueError: You have chosen an enzyme that is not provided** +This error occurs if you provide an enzyme other than "trypsin" or "chymotrypsin" when calling peptide_cutter. Make sure to use one of the specified enzymes. + + Example: + ```python + peptide_cutter("VLSPADKTNVKAAW", "unknown_enzyme") + # Output: You have chosen an enzyme that is not provided. Please choose between trypsin and chymotrypsin. + ``` +4. **ValueError: You have chosen an enzyme that is not provided.** +If you encounter this error, it means that you're trying to iterate over a float value. Ensure that you're using the correct function and passing the correct arguments. + + Example: + ```python + result = count_hydroaffinity(123) + # Output: TypeError: 'int' object is not iterable + ``` + + +## Contacts +If you have any questions, suggestions, or encounter any issues while using the amino-analyzer tool, feel free to reach out: + + +- **GitHub**: [Cucumberan](https://github.com/YourGitHubUsername), [CaptnClementine](https://github.com/YourGitHubUsername), [iliapopov17](https://github.com/YourGitHubUsername) From 42d6fa400e46e17b6a84e61fd28ad7deac61e581 Mon Sep 17 00:00:00 2001 From: Cucumberan <122701551+Cucumberan@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:30:41 +0400 Subject: [PATCH 13/16] Update README.md --- HW4_Gorbarenko/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Gorbarenko/README.md b/HW4_Gorbarenko/README.md index 5e7bb92..12b15c7 100644 --- a/HW4_Gorbarenko/README.md +++ b/HW4_Gorbarenko/README.md @@ -13,7 +13,7 @@ This function counts the quantity of hydrophobic and hydrophilic amino acids wit Researchers can identify cleavage sites in a given peptide sequence using a specified enzyme. The tool currently supports two commonly used enzymes, trypsin and chymotrypsin. ### 4. One-letter to three-Letter code conversion The amino_analyzer provides a function to convert a protein sequence from the standard one-letter amino acid code to the three-letter code. -#### 5. Sulphur-containing amino acid counting +### 5. Sulphur-containing amino acid counting The tool allows a quick determine the number of sulphur-containing amino acids, namely Cysteine (C) and Methionine (M), within a protein sequence. ## Usage From 787d5cac84b18104e272d9e160683b7c22abdedc Mon Sep 17 00:00:00 2001 From: Cucumberan <122701551+Cucumberan@users.noreply.github.com> Date: Sat, 30 Sep 2023 16:33:00 +0400 Subject: [PATCH 14/16] Update README.md --- HW4_Gorbarenko/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Gorbarenko/README.md b/HW4_Gorbarenko/README.md index 12b15c7..18c1ef5 100644 --- a/HW4_Gorbarenko/README.md +++ b/HW4_Gorbarenko/README.md @@ -18,7 +18,7 @@ The tool allows a quick determine the number of sulphur-containing amino acids, ## Usage -To run amino_analyzer tool you need to use the main function ***run_amino_analyzer*** with the following arguments: +To run amino_analyzer tool you need to use the function ***run_amino_analyzer*** with the following arguments: ```python from from amino_analyzer import run_amino_analyzer From d2410353b4b3c2b8e3a7f61702c104d17915f492 Mon Sep 17 00:00:00 2001 From: CaptnClementine <131146976+CaptnClementine@users.noreply.github.com> Date: Sat, 30 Sep 2023 22:15:45 +0300 Subject: [PATCH 15/16] Update README.md --- HW4_Gorbarenko/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/HW4_Gorbarenko/README.md b/HW4_Gorbarenko/README.md index 18c1ef5..0acd27a 100644 --- a/HW4_Gorbarenko/README.md +++ b/HW4_Gorbarenko/README.md @@ -159,7 +159,12 @@ If you encounter this error, it means that you're trying to iterate over a float result = count_hydroaffinity(123) # Output: TypeError: 'int' object is not iterable ``` +## Development team: +![image](https://github.com/CaptnClementine/HW4_Gorbarenko/assets/131146976/ad89e427-5b2a-4b32-b65f-519d284fcaa7) +**Anastasia Gorbarenko** - team leader, author of aa_weight and count_hydroaffinity functions +**Anna Ogurtsova** - author of peptide_cutter and one_to_three_letter_code functions +**Ilya Popov** - author of main and sulphur_containing_aa_counter functions ## Contacts If you have any questions, suggestions, or encounter any issues while using the amino-analyzer tool, feel free to reach out: From eedebb376b931e632f3b5d0fe2d7c0cda65d891e Mon Sep 17 00:00:00 2001 From: CaptnClementine <131146976+CaptnClementine@users.noreply.github.com> Date: Sat, 30 Sep 2023 22:18:38 +0300 Subject: [PATCH 16/16] Update README.md --- HW4_Gorbarenko/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Gorbarenko/README.md b/HW4_Gorbarenko/README.md index 0acd27a..e874128 100644 --- a/HW4_Gorbarenko/README.md +++ b/HW4_Gorbarenko/README.md @@ -21,7 +21,7 @@ The tool allows a quick determine the number of sulphur-containing amino acids, To run amino_analyzer tool you need to use the function ***run_amino_analyzer*** with the following arguments: ```python -from from amino_analyzer import run_amino_analyzer +from amino_analyzer import run_amino_analyzer run_amino_analyzer(sequence, procedure, *, weight_type = 'average', enzyme: str = 'trypsine')` ```