-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgffCleaner.pl
72 lines (49 loc) · 1.36 KB
/
gffCleaner.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
my %opts;
getopts('hg:o:',\%opts);
&varcheck;
open (GF,$opts{'g'}) or die "Can't open $opts{'g'}\n";
my $outFile = $opts{'o'} || "-";
open (OUT,">$outFile") or die "Can't open $outFile\n";
print OUT "##gff-version\t3\n";
while (my $line=<GF>){
chomp $line;
my ($id,$source,$type,$start,$end,$score,$strand,$phase,$note) = (split("\t",$line));
if ($type =~ /mrna/i || $type =~ /cds/i || $type =~ /utr/i){
if ($type =~ /mrna/i && $note=~/parent/i){
$note=~s/parent=[^;]+[;]*?//i;
#print $note."\n";
}
my @printArray = ($id,$source,$type,$start,$end,$score,$strand,$phase,$note);
print OUT join("\t",@printArray)."\n";
}
}
sub varcheck {
&usage if ($opts{'h'});
my $errors = "";
if (!$opts{'g'}){
$errors .= "-g flag not provided\n";
}
elsif(!(-e $opts{'g'})) {
$errors .= "Can't open $opts{'g'}\n";
}
if ($errors ne "") {
print "\n$errors";
&usage;
}
}
sub usage{
my $scriptName = $0;
$scriptName =~ s/\/?.*\///;
print "\nusage: perl $scriptName <-g file> [-o outfile]\n";
print <<PRINTTHIS;
Converts gff file to gff3 format, retaining only mRNA, CDS and UTR lines.
Functionality highly dependent on what the inital gff file looks like.
-g gff file to convert
-o optional file for output. Defaults to STDOUT
PRINTTHIS
exit;
}