-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPhaseExtenderOnForLoop_SetA.sh
54 lines (30 loc) · 1.91 KB
/
PhaseExtenderOnForLoop_SetA.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/bash
set -euf pipefail # prevents all the codes from running if a error is hit in any code.
## Run phaseExtension on all the samples using "for loop"
# set the path for "phaseExtender.py" file ; **note: Update path as need be.
phaseEXT=phase-Extender.py
# create empty file to store the output path for each run
echo > data/files_to_merge_SetA_run01.txt
for item in NA07056 NA06989 NA12891 NA12890 NA12875 NA12827 NA06985 NA12763 NA11917 NA12892
do
# Run phaseExtension on the item (aka sample)
phase-extender --input data/SetA/simulated_RBphasedHaplotype_SetA.txt --SOI ${item} --output data/SetA/phased_${item}_SetA_run01 --numHets 25 --lods 5 --writeLOD yes --hapStats yes --addMissingSites no
# also write the path of the output directory for each sample
# so, they can be merged later
echo "data/SetA/phased_${item}_SetA_run01/extended_haplotype_${item}.txt" >> data/files_to_merge_SetA_run01.txt
done
## Now, merge all the haplotype together
# remove the first empty line from the file that store path to the extended haplotype for each sample
echo "$(tail -n +2 data/files_to_merge_SetA_run01.txt)" > data/files_to_merge_SetA_run01.txt
# set the path for "merge_haplotypePandas.py" file ; **note: Update path as need be.
mergeHAP=merge_haplotypePandas.py
# use, a python script to merge the haplotypes together
# we store the file in a new directory "data/SetA_02"
mkdir -p data/SetA_02
python ${mergeHAP} --hapList data/files_to_merge_SetA_run01.txt --output data/SetA_02
# Make a copy and rename the above output file "merged_haplotype.txt" to "phaseExtendedHaplotype_SetA02.txt"
cp data/SetA_02/merged_haplotype.txt data/SetA_02/phaseExtendedHaplotype_SetA_02.txt
rm data/SetA_02/merged_haplotype.txt
## Now, use the "R" script for computing "switch error" statistics.
# file named "SwitchErrorTest_PhaseExtenderSetA.R"
# this compares "truth set" against the "phased_SetA_run01" of sample NA12891