-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastaRename.pl
executable file
·109 lines (87 loc) · 2 KB
/
fastaRename.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
my %opts;
getopts('f:p:o:s:a:h',\%opts);
&varcheck;
my $prefix = $opts{'p'} || "gene";
my $outfile = $opts{'o'} || "-";
open (FH,$opts{'f'});
open (OH,">$outfile");
my $counter = 1;
my %nameMap;
my %nameConvert;
if ($opts{'s'}) {
%nameConvert = &readNameSubs($opts{'s'});
}
my @nameOrder;
foreach my $line (<FH>) {
if ($line=~/^>/){
chomp $line;
$line =~ s/^>//;
push (@nameOrder,$line);
my $newName;
if ($opts{'s'}) {
if ($nameConvert{$line}) {
$newName = $nameConvert{$line};
}
else {
$newName = $prefix.sprintf('%06d',$counter);
$counter++;
}
}
else {
$newName = $prefix.sprintf('%06d',$counter);
$counter++;
}
$nameMap{$line} = $newName;
print OH ">$newName\n";
}
else {
print OH $line;
}
}
open (LOG,">nameMap.txt");
foreach my $oldName (@nameOrder) {
print LOG "$oldName\t$nameMap{$oldName}\n";
}
sub readNameSubs {
my $filename = shift;
open (IN,$filename);
my %retHash;
foreach my $line (<IN>) {
chomp $line;
my($old,$new) = split("\t",$line);
$retHash{$old} = $new;
}
return %retHash;
}
sub varcheck {
&usage if ($opts{'h'});
my $errors = "";
if (!$opts{'f'}){
$errors .= "You have not provided fasta file with the -f flag\n";
}
elsif(!(-e $opts{'f'})) {
$errors .= "Can't open $opts{'f'}\n";
}
if ($errors ne "") {
print "\n$errors";
&usage;
}
}
sub usage{
my $scriptName = $0;
$scriptName =~ s/\/?.*\///;
print "\nusage: perl $scriptName <-f> [-o] [-p -s] \n";
print <<PRINTTHIS;
Renames a fasta file. If only the -p flag is used, a sequential number will be used.
-p text specifies the header name prefix. Default = "gene"
-s filename substitution file. Tab delim file with existing name in col 1 and new name in col 2.
If a match is not found, it will revert to using a name with a prefix and a sequential number.
-a integer pad the sequential numbering with 0's. Default = 0.
-o filename output file name
PRINTTHIS
exit;
}