-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathgenerateArtifactFinder.pl
executable file
·108 lines (79 loc) · 3.5 KB
/
generateArtifactFinder.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/perl -w
use strict;
use File::Basename;
use DBI;
if (@ARGV != 2) {
print "\tUsage:\n";
print "\t\t./generateArtifactFinder <snvmix_novel_codon_annot file> <full path to output file>\n";
exit;
}
#CONSTANT
my $EXTENSION = ".txt";
my $JUNCTIONSPLICE = "/share/data/rgoya/scripts/getIntronZone-beta.pl";
#command line args
my $nonsynfile = $ARGV[0];
my $artifactfile = $ARGV[1];
#files generated
my $rootpath = dirname($nonsynfile);
my $basename = basename($nonsynfile, $EXTENSION);
my $genenamefile = $rootpath . "/" . $basename . "_gene.txt";
my $tmpcheckfile = $rootpath . "/" . $basename . "_tmpCheck.txt";
my $tmpresultfile = $rootpath . "/" . $basename . "_tmpResult.txt";
#commands to run
my $juncArtCheck = "$JUNCTIONSPLICE -l $tmpcheckfile > $tmpresultfile";
# open up database to get gene name from ensemble id
#open a database handle
my $dbh = DBI->connect('DBI:mysql:ovmut_51;host=shannon.cluster.bccrc.ca;port=3306', 'ovmut_rw', 'o9v8m7')
or die "Couldn't connect to the database: " . DBI->errstr;
# reusable prepared statement handles
my $gene_handle = $dbh->prepare("SELECT gene_name FROM Gene WHERE ensg_id=?");
# create temperory file for junction splicing check
open (INFO, $nonsynfile);
open (FILE, ">$tmpcheckfile");
open (FILE2, ">$genenamefile");
print FILE "gene_name\tchr\tstart\n";
while (my $line = <INFO>) {
chomp($line);
my @content = split(/\s+/, $line);
my ($chromosome, $position) = split(/:/, $content[0]);
$chromosome =~ s/chr//;
$gene_handle->execute($content[1]);
my $gene_result = $gene_handle->fetchrow();
print FILE join("\t", $gene_result, $chromosome, $position) . "\n";
print FILE2 join(" ", $line, $gene_result) . "\n";
}
close(INFO);
close(FILE);
close(FILE2);
# get junction splicing result
#print $juncArtCheck . "\n";
`$juncArtCheck`;
# join junction splicing result with annot file
open (RESULT, $tmpresultfile);
my %splice_result = ();
print "$tmpresultfile\n";
while (my $newline = <RESULT>) {
if ($newline =~ m/\bConnecting\b|^\#\#SUMMARY$|^\s*$/) {
next;
}
chomp ($newline);
my ($gene, $chr, $pos, $result) = split(/\t/, $newline);
my $key = join ("\t", $gene, $chr, $pos);
$splice_result{$key} = $result;
}
close (RESULT);
#print "size of hash: " . keys (%splice_result) . "\n";
open (MERGE, $genenamefile);
open (FINAL, ">$artifactfile");
while (my $read_content = <MERGE>) {
chomp($read_content);
my @info = split(/\s+/, $read_content);
my ($mut_chr, $mut_pos) = split(/:/, $info[0]);
$mut_chr =~ s/chr//;
my $mut_gene = pop(@info);
my $mut_key = join("\t", $mut_gene, $mut_chr, $mut_pos);
print FINAL join(" ", @info, $splice_result{$mut_key}) . "\n";
}
close (MERGE);
close (FINAL);
`rm $genenamefile $tmpcheckfile $tmpresultfile`;