-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsizeSelectFromFasta.pl
76 lines (60 loc) · 1.26 KB
/
sizeSelectFromFasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
my %opts;
getopts('f:hs:n:',\%opts);
&varcheck;
my ($lenMin,$lenMax);
if ($opts{'s'}=~/-/){
($lenMin,$lenMax) = (split("-",$opts{'s'}));
}
else{
$lenMin = $opts{'s'};
$lenMax = $opts{'s'};
}
open (FH,$opts{'f'}) or die "Can't open $opts{'f'}";
my ($header,$sequence)= ("","");
while (my $line=<FH>){
chomp $line;
next if $line=~/^#/;
if($line=~/>/){
if ($sequence ne "" && length($sequence) >= $lenMin && length($sequence) <= $lenMax){
print "$header\n$sequence\n";
}
$header = $line;
$sequence = "";
}
else{
$sequence .= $line;
}
}
sub varcheck {
&usage if ($opts{'h'});
my $errors = "";
if (!$opts{'f'}){
$errors .= "-f flag not provided\n";
}
elsif(!(-e $opts{'f'})) {
$errors .= "Can't open $opts{'f'}\n";
}
if (!$opts{'s'}){
$errors .= "-s flag not provided\n";
}
if ($errors ne "") {
print "\n$errors";
&usage;
}
}
sub usage{
my $scriptName = $0;
$scriptName =~ s/\/?.*\///;
print "\nusage: perl $scriptName <-f file> <-s size range> [-n read count]\n";
print <<PRINTTHIS;
Select sequences from a fasta that only correspond to certain sizes
-f Fasta formatted file to filter
-s Sizes to keep (e.g. 20-22 or 24);
-n minimum read count
PRINTTHIS
exit;
}