-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPseudohaploidGenotypeCalling
119 lines (93 loc) · 4.69 KB
/
PseudohaploidGenotypeCalling
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Description: Call pseudohaploid genotypes in eigenstrat or plink format from .bam files using sequenceTools.
# SequenceTools GitHub: https://github.com/stschiff/sequenceTools
# enter an interactive session (or use pbs script)
qsub -I -l walltime=10:00:00 -l nodes=1:ppn=12 -l mem=32gb
# change directory to where you will be working
cd /your/file/path/genotype_calling
# load software packages
ml gcc/12.1.0
ml gsl/2.7.1
ml openblas
ml samtools/1.17
ml bcftools/1.17
ml htslib
# run samtools mpileup (https://www.htslib.org/doc/samtools-mpileup.html)
# mpileup command anatomy
samtools mpileup [-EB] [-C capQcoef] [-r reg] [-f in.fa] [-l list] [-Q minBaseQ] [-q minMapQ] in.bam [in2.bam [...]]
# to run on a single .bam file
samtools mpileup -R -B -q30 -Q20 -l snp_list -f human_g1k_v37.fasta /your/file/path/data/name.bam > pileup.txt
pileupCaller-linux --randomHaploid --sampleNames [ID##] --samplePopName [Site_ID] -f snp_list -e [file_name] < pileup.txt
# to loop over multiple bam files
data_dir="/your/file/path/data_directory/"
eig_out_root="${data_dir}/pseudohaploid"
bam_files=$(ls ${data_dir}/bams/*bam)
sample_names=""
for f in $bam_files; do
sample_id=$(basename $f | cut -f1 -d "_")
#sample_id=$(echo $f | cut -f12 -d "/" | cut -f2 -d "_")
if [ "$sample_names" = "" ]; then
sample_names=$sample_id
else
sample_names=${sample_names}","$sample_id
fi
done
# to check the list of sample names
echo $sample_names
samtools mpileup -R -B -q30 -Q20 -l /your/file/path/snp_list -f /your/file/path/hs37d5/hs37d5.fa $bam_files > pileup.txt
/your/file/path/bin/pileupCaller-linux --randomHaploid --sampleNames $sample_names --samplePopName [Site_ID] -f /your/file/path/snp_list -e [file_name] < pileup.txt
# merge the output of the genotype calling with other datasets (AADR used as example below)
# Documentation for mergeit: https://github.com/argriffing/eigensoft/blob/master/CONVERTF/README
ml gcc/12.1.0
ml intel/2022.2
ml llvm/14.0.5
ml eigensoft/8.0.0
#######################################################################################################
#cat <<EOT>> parMerge
DIR: /your/file/path/data/AADR/1240K/v54
geno1: DIR/v54.1_1240K_public.geno
snp1: DIR/v54.1_1240K_public.snp
ind1: DIR/v54.1_1240K_public.ind
geno2: /your/file/path/users_projects/hmoots/analyses/genotypecalling/output.geno
snp2: /your/file/path/users_projects/hmoots/analyses/genotypecalling/output.snp
ind2: /your/file/path/users_projects/hmoots/analyses/genotypecalling/output.ind
genooutfilename: output_plus_aadr_merge_1240K_snps.geno
snpoutfilename: output_plus_aadr_merge_1240K_snps.snp
indoutfilename: output_plus_aadr_merge_1240K_snps.ind
hashcheck: NO
EOT
##########
#to run
mergeit -p parMerge
#######################################################################################################
#cat <<EOT>> parMerge
DIR: /your/file/path/users_projects/hmoots/data/AADR/HO/v54
geno1: DIR/v54.1.p1_HO_public.geno
snp1: DIR/v54.1.p1_HO_public.snp
ind1: DIR/v54.1.p1_HO_public.ind
geno2: /your/file/path/users_projects/hmoots/analyses/genotypecalling/output.geno
snp2: /your/file/path/users_projects/hmoots/analyses/genotypecalling/output.snp
ind2: /your/file/path/users_projects/hmoots/analyses/genotypecalling/output.ind
genooutfilename: output_plus_aadr_merge_HO_snps.geno
snpoutfilename: output_plus_aadr_merge_HO_snps.snp
indoutfilename: output_plus_aadr_merge_HO_snps.ind
hashcheck: NO
EOT
##########
#to run
mergeit -p parMerge
#######################################################################################################
#######################################################################################################
#######################################################################################################
#######################################################################################################
#######################################################################################################
#######################################################################################################
qsub -I -l walltime=10:00:00 -l nodes=1:ppn=12 -l mem=32gb
cd /your/file/path/users_projects/analyses/genotypecalling
ml gcc
module load bcftools/1.6.0
module load samtools
module load htslib
bindir="/your/file/path/bin"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${bindir}"/glibc214/glibc-2.14/build"
samtools mpileup -R -B -q30 -Q20 -l /your/file/path/snp_list -f /your/file/path/hs37d5/hs37d5.fa $bam_files > pileup.txt
/your/file/path/bin/pileupCaller-linux --randomHaploid --sampleNames $sample_names --samplePopName [Site_ID] -f /your/file/path/snp_list -e [file_name] < pileup.txt