-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgffExtractor.pl
87 lines (72 loc) · 1.74 KB
/
gffExtractor.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
my %opts;
getopts('g:l:o:vc',\%opts);
&varcheck;
my @list = `cat $opts{'l'}`;
my $outFile = $opts{'o'} || "-";
open (FH,$opts{'g'}) or die "Can't open $opts{'g'}\n";
open (OUT,">$outFile") or die "Can't open $outFile\n";
print OUT "##gff-version\t3\n" if $opts{'c'};
if ($opts{'v'}){
while (my $line=<FH>){
next if $line =~/#/;
my $foundFlag="false";
foreach my $gene (@list){
chomp $gene;
if ($line=~/$gene[,;\n]/){
$foundFlag="true";
last;
}
}
print OUT $line if $foundFlag eq "false";
}
}
else{
while (my $line=<FH>){
next if $line =~ /#/;
foreach my $gene (@list){
chomp $gene;
if ($line=~/$gene[,;\n]/){
print OUT $line;
last;
}
}
}
}
sub varcheck {
my $errors = "";
if (!$opts{'g'}){
$errors .= "-g flag not provided\n";
}
elsif(!(-e $opts{'g'})) {
$errors .= "Can't open $opts{'g'}\n";
}
if (!$opts{'l'}){
$errors .= "-l flag not provided\n";
}
elsif(!(-e $opts{'l'})) {
$errors .= "Can't open $opts{'l'}\n";
}
if ($errors ne "") {
print "\n$errors";
&usage;
}
}
sub usage{
my $scriptName = $0;
$scriptName =~ s/\/?.*\///;
print "\nusage: perl $scriptName <-g gff file> <-l gene list> [-o -v -c]\n";
print <<PRINTTHIS;
Extracts genes from gff file. Script is necessary because grep is too greedy and
grep -f can't be used to funnel a list into a regex.
Assumes that the gene name is in the note and is followed by one of ";,\\n".
Skips any blank lines or lines beginning in "#".
-o file. Specifies the optional output file. Default output is to STDOUT.
-c (flag). Will print "##gff-version 3" at the head of the file.
-v (flag). Will print all genes EXCEPT for the ones listed in the -l file.
PRINTTHIS
exit;
}