-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_counter_II.pl
executable file
·98 lines (84 loc) · 3.46 KB
/
read_counter_II.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/perl
# A script to count the number of overlapping reads for a given window size around a list of given origins
# This has minor changes from read_counter.pl to count reads from all 829 origins of ori_data_1.4.txt
# Input:
# 1. file with list of origin locations (ori_data_1.4.txt)
# 2. window size (even integer in bp)
# 3. prefix for sorted bam file (dsSPD4a, etc. note that an indexed bam file must be located in same directory)
# 4. prefix for associated bam file (dsSPD4b_1, etc.)
# 5. prefix for associated bam file (dsSPD8a, etc.)
# 6. prefix for associated bam file (dsSPD8b, etc.)
# Example: read_counter_II.pl ori_data_1.4.txt 1000 dsSPD4a dsSPD4b_1 dsSPD8a dsSPD8b
# Prerequisites:
# samtools must be installed and binary in PATH
use strict;
use warnings;
# Arguments
my $origin_locs=$ARGV[0];
my $window_size=$ARGV[1];
my $read_map1 = $ARGV[2];
my $read_map2 = $ARGV[3];
my $read_map3 = $ARGV[4];
my $read_map4 = $ARGV[5];
# Variables
my $window_start;
my $window_end;
###BEGIN PROGRAM########################################################################
# Create string to represent summed MCM count over datasets (just use first 6 characters)
$read_map1=~ /.{6}/;
my $read_map_1_2 = $&;
$read_map3=~ /.{6}/;
my $read_map_3_4 = $&;
# Assign mapped read files
my $map_file1 = join("", $read_map1, "_mapped_sorted.bam");
my $map_file2 = join("", $read_map2, "_mapped_sorted.bam");
my $map_file3 = join("", $read_map3, "_mapped_sorted.bam");
my $map_file4 = join("", $read_map4, "_mapped_sorted.bam");
# Open location file and file to print results
open(ORIGINS, $origin_locs);
open(OUT, ">ori_data_1.5.txt");
# Set variables for origin location input file (need to be adjusted depending on file)
my $chr_col = 1;
my $acs_loc_col = 3;
# Skip the header line of location file
my $header = <ORIGINS>;
$header =~ /.+/;
# Add new read count columns to output file
my $new_header = join("\t", $&, "${read_map1}_MCM_count", "${read_map2}_MCM_count", "${read_map_1_2}_MCM_count", "${read_map3}_MCM_count", "${read_map4}_MCM_count", "${read_map_3_4}_MCM_count");
print OUT "$new_header\n";
# Cycle through origin locations outputting read count for each location
while ($_ = <ORIGINS>) {
my @line = split;
# Extract origin location info
my $chr = $line[$chr_col];
my $acs_loc = $line[$acs_loc_col];
# Get read counts overlapping window around acs motif start location
if($acs_loc < ($window_size/2)) {
$window_start = 0;
$window_end = $window_size;
} else {
$window_start = $acs_loc - ($window_size/2);
$window_end = $acs_loc + ($window_size/2);
}
my $mcm_count1 = `samtools view -c $map_file1 chr$chr:$window_start-$window_end`;
$mcm_count1 =~ /.+/;
$mcm_count1 = $&;
my $mcm_count2 = `samtools view -c $map_file2 chr$chr:$window_start-$window_end`;
$mcm_count2 =~ /.+/;
$mcm_count2 = $&;
my $mcm_count_1_2 = $mcm_count1 + $mcm_count2;
my $mcm_count3 = `samtools view -c $map_file3 chr$chr:$window_start-$window_end`;
$mcm_count3 =~ /.+/;
$mcm_count3 = $&;
my $mcm_count4 = `samtools view -c $map_file4 chr$chr:$window_start-$window_end`;
$mcm_count4 =~ /.+/;
$mcm_count4 = $&;
my $mcm_count_3_4 = $mcm_count3 + $mcm_count4;
# Join results
my $mcm_results = join("\t", $mcm_count1, $mcm_count2, $mcm_count_1_2, $mcm_count3, $mcm_count4, $mcm_count_3_4);
# Print mcm_count to file
push(@line, $mcm_results);
my $new_line = join("\t", @line);
print OUT "$new_line\n";
}
close OUT;