-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransm_domains.pl
66 lines (47 loc) · 1.39 KB
/
transm_domains.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
open OUT,">out.txt";
$/="\/\/\n";
while (<>)
{
if($_=~/^ID\s{3}(.*?)\s+(.*?)\;\s+(\d+\sAA.)/m) #keeps the ID of our protein
{
print OUT ">$1";
}
if($_=~/^AC\s{3}(.*?)\;/m) #keeps the AC of the protein
{
print OUT "|$1";
}
if($_=~/^SQ SEQUENCE\s+(.*?)\;/m) #keeps the length of the sequence
{
print OUT "|$1\n";
if ($1=~/^(\d+)\sAA/) #to keep only the number end not the "AA"
{
$numOfAA = $1;
}
}
for ($i=0 ; $i<$numOfAA ; $i++) #creates a table with "-" as long as the length of the sequence
{
@TransmemArr[$i] = "-";
}
while($_=~/^FT\s{3}TRANSMEM\s+(\d+)\s+(\d+)\s+(.*)\./mg)
{
$tmstart=$1; #corresponds to the first parenthesis
$tmend=$2; #corresponds to the second parenthesis
for ($j=$tmstart ; $j<=$tmend ; $j++) #inserts M from tmstart until tmend, which corresponds to transmembrane part
{
@TransmemArr[$j] = "M";
}
}
while($_=~/^\s{5}(.*)/mg) #display the sequence
{
$sequence=$1;
$sequence=~s/\s//g;
print OUT "$sequence";
}
print OUT "\n"; #to change the line in order to print in different line the sequence and the transmembrane domains
for ($k=0 ; $k<$numOfAA ; $k++) #display the array with the ton pinaka mas pou pleon gnwrizoume ta diamembranika tmimata
{
print OUT @TransmemArr[$k];
}
print OUT "\n\/\/\n";
}
close OUT