Merge pull request #21 from BenjaminsM/master

Changes from Roan for winged helix and circleci instead of jenkins
molgenis · Mar 31, 2023 · 4a8da6e · 4a8da6e
2 parents 6f2e541 + e35aabf
commit 4a8da6e
Show file tree

Hide file tree

Showing 12 changed files with 248 additions and 40 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -0,0 +1,51 @@
+---
+#
+# Python CircleCI 2.0 configuration file
+# Check https://circleci.com/docs/2.0/language-python/ for more details.
+#
+version: 2.1
+orbs: 
+  shellcheck: circleci/[email protected]
+jobs:
+  build:
+    docker:
+      - image: cimg/base:stable
+    working_directory: ~/repo
+    resource_class: small
+    steps:
+      - checkout
+      - run:
+          name: Install ShellCheck
+          command: |
+            set -e
+            set -u
+            set -o pipefail
+            # Check if we are root or need sudo.
+            if [[ ${EUID} == 0 ]]; then
+                export SUDO=''
+            else
+                export SUDO='sudo'
+            fi
+            SHELLCHECK_VERSION=0.8.0
+            wget -qO- "https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.x86_64.tar.xz" \
+                | tar -xJf -
+            cd "shellcheck-v${SHELLCHECK_VERSION}/"
+            ${SUDO} cp shellcheck /usr/local/bin/
+      - run:
+          name: Run IndentationCheck
+          shell: /bin/bash
+          command: |
+            set -e
+            set -u
+            set -o pipefail
+            check/indentationcheck.sh
+      - run:
+          name: Run ShellCheck
+          shell: /bin/bash
+          command: |
+            set -e
+            set -u
+            set -o pipefail
+            check/shellcheck.sh
+...
+
diff --git a/Jenkinsfile b/Jenkinsfile
diff --git a/bin/ConcordanceCheck.sh b/bin/ConcordanceCheck.sh
@@ -183,12 +183,8 @@ do
 		touch "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.started"
 		arrayId=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $1}')
 		arrayVcf="${arrayId}.FINAL.vcf"
-		arrayFileLocation=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $3}')
-		rsync -av --copy-links "${arrayFileLocation}" "${arrayVcfDir}"
 		ngsId=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $2}')
 		ngsVcf="${ngsId}.final.vcf.gz"
-		ngsFileLocation=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $4}')
-		rsync -av --copy-links "${ngsFileLocation}" "${ngsVcfDir}"
 
 		bedType="$(zcat "${ngsVcfDir}/${ngsVcf}" | grep -m 1 -o -P 'intervals=\[[^\]]*.bed\]' | cut -d [ -f2 | cut -d ] -f1)"
 		bedDir="$(dirname "${bedType}")"

diff --git a/bin/ParseDarwinSamplesheet.sh b/bin/ParseDarwinSamplesheet.sh
@@ -46,7 +46,8 @@ function showHelp() {
 	#
 	cat <<EOH
 ======================================================================================================================
-Scripts to make automatically a samplesheet for the concordance check between ngs and array data.
+Scripts to make automatically a samplesheet for the concordance check between ngs and array data and pushes the ngs and 
+array data to the destination machine.
 ngs.vcf should be in /groups/${NGSGROUP}/${PRM_LFS}/concordance/ngs/.
 array.vcf should be in /groups/${ARRAYGROUP}/${PRM_LFS}/concordance/array/.
 
@@ -189,7 +190,7 @@ fi
 #kolom 5: DNA nummer array
 
 mapfile -t sampleSheetsDarwin < <(find "/groups/${GROUP}/${DAT_LFS}/ConcordanceCheckSamplesheets/" -maxdepth 1 -type f -name '*.csv')
-if [[ "${#sampleSheetsDarwin[@]:-0}" -eq '0' ]]
+if [[ "${#sampleSheetsDarwin[@]}" -eq '0' ]]
 then
 	log4Bash 'WARN' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "No sample sheets found @ /groups/${GROUP}/${DAT_LFS}/ConcordanceCheckSamplesheets/: There is nothing to do."
 	trap - EXIT
@@ -213,7 +214,7 @@ else
 		if [ -e "${ngsPath[0]}" ]
 		then
 			mapfile -t ngsVcf < <(find "/groups/${NGSGROUP}/prm0"*"/projects/"*"${projectNGS}"*"/run01/results/variants/" -maxdepth 1 -name "*${dnaNGS}*.vcf.gz")
-			if [[ "${#ngsVcf[@]:-0}" -eq '0' ]]
+			if [[ "${#ngsVcf[@]}" -eq '0' ]]
 			then
 				log4Bash 'WARN' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "/groups/${GROUP}/*prm0*/projects/${projectNGS}*/run*/results/variants/*${dnaNGS}*.vcf.gz NOT FOUND! skipped"
 				continue
@@ -231,7 +232,7 @@ else
 		then
 			mapfile -t arrayVcf < <(find "/groups/${ARRAYGROUP}/prm0"*"/projects/"*"${projectArray}"*"/run01/results/vcf" -maxdepth 1  -name "${dnaArray}*.vcf")
 
-			if [[ "${#arrayVcf[@]:-0}" -eq '0' ]]
+			if [[ "${#arrayVcf[@]}" -eq '0' ]]
 			then
 				log4Bash 'WARN' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "/groups/${ARRAYGROUP}/prm0*/projects/*${projectArray}*/run*/results/vcf/${dnaArray}*.vcf NOT FOUND! skipped"
 			else
@@ -243,6 +244,11 @@ else
 			continue
 		fi
 		host_prm=$(hostname -s)
+
+		#rsync data to tmp
+		rsync -av "${arrayVcf[0]}" "${HOSTNAME_TMP}:/groups/${GROUP}/${TMP_LFS}/concordance/array/"
+		rsync -av "${ngsVcf[0]}" "${HOSTNAME_TMP}:/groups/${GROUP}/${TMP_LFS}/concordance/ngs/"
+
 		# shellcheck disable=SC2029	
 		ssh "${HOSTNAME_TMP}" "echo -e \"data1Id\tdata2Id\tlocation1\tlocation2\n${arrayId}\t${ngsVcfId}\t${host_prm}:${arrayVcf[0]}\t${host_prm}:${ngsVcf[0]}\" > \"/groups/${GROUP}/${TMP_LFS}/concordance/samplesheets/${samplesheetName}.sampleId.txt\""
 		log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "samplesheet created on ${HOSTNAME_TMP}: /groups/${GROUP}/${TMP_LFS}/concordance/samplesheets/${samplesheetName}.sampleId.txt"

diff --git a/bin/cleanup.sh b/bin/cleanup.sh
@@ -168,7 +168,7 @@ do
 	log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' \
 	"check ${ConcordanceID}, ngsId:${ngsId} and arrayId:${arrayId} ngsDnaNo:${ngsDnaNo}, arrayDnaNo:${arrayDnaNo}"
 
-	if ssh -n "${ATEAMBOTUSER}@${HOSTNAME_PRM}" test -e "/groups/${group}/${PRM_LFS}/concordance/logs/${ConcordanceID}.copyConcordanceCheckData.finished"
+	if test -e "${concordanceDir}/logs/${ConcordanceID}.ConcordanceCheck.finished" 2>/dev/null
 	then
 		log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' \
 			"rm -rf ${concordanceDir}/jobs/${ConcordanceID}.*/ 

diff --git a/config.yml b/config.yml
@@ -0,0 +1,50 @@
+---
+#
+# Python CircleCI 2.0 configuration file
+# Check https://circleci.com/docs/2.0/language-python/ for more details.
+#
+version: 2.1
+orbs: 
+  shellcheck: circleci/[email protected]
+jobs:
+  build:
+    docker:
+      - image: cimg/base:stable
+    working_directory: ~/repo
+    resource_class: small
+    steps:
+      - checkout
+      - run:
+          name: Install ShellCheck
+          command: |
+            set -e
+            set -u
+            set -o pipefail
+            # Check if we are root or need sudo.
+            if [[ ${EUID} == 0 ]]; then
+                export SUDO=''
+            else
+                export SUDO='sudo'
+            fi
+            SHELLCHECK_VERSION=0.8.0
+            wget -qO- "https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.x86_64.tar.xz" \
+                | tar -xJf -
+            cd "shellcheck-v${SHELLCHECK_VERSION}/"
+            ${SUDO} cp shellcheck /usr/local/bin/
+      - run:
+          name: Run IndentationCheck
+          shell: /bin/bash
+          command: |
+            set -e
+            set -u
+            set -o pipefail
+            check/indentationcheck.sh
+      - run:
+          name: Run ShellCheck
+          shell: /bin/bash
+          command: |
+            set -e
+            set -u
+            set -o pipefail
+            check/shellcheck.sh
+...
diff --git a/etc/ConcordanceCheck.cfg b/etc/ConcordanceCheck.cfg
@@ -1,3 +1,3 @@
-htsLibVersion='HTSlib/1.3.2-foss-2015b'
-compareGenotypeCallsVersion='CompareGenotypeCalls/1.8.1-Java-1.8.0_74'
-bedToolsVersion='BEDTools/2.25.0-foss-2015b'
+htsLibVersion='HTSlib/1.14-foss-2018b'
+compareGenotypeCallsVersion='CompareGenotypeCalls/1.8.1-Java-8-LTS'
+bedToolsVersion='BEDTools/2.30.0-foss-2018b'
diff --git a/etc/betabarrel.cfg b/etc/betabarrel.cfg
@@ -0,0 +1,5 @@
+TMP_LFS='tmp05'
+PRM_LFS='prm05'
+SCR_LFS="${TMP_LFS}"
+DAT_LFS='dat05'
+HOSTNAME_TMP='porch+betabarrel'
diff --git a/etc/copperfist.cfg b/etc/copperfist.cfg
@@ -0,0 +1,5 @@
+TMP_LFS='tmp06'
+PRM_LFS='prm06'
+SCR_LFS="${TMP_LFS}"
+DAT_LFS='dat06'
+HOSTNAME_TMP='porch+copperfist'
diff --git a/etc/uozkh1016.cfg b/etc/uozkh1016.cfg
@@ -0,0 +1,8 @@
+TMP_LFS='tmp07'
+PRM_LFS='prm07'
+SCR_LFS='${TMP_LFS}'
+HOSTNAME_PRM='localhost'
+DIAGNOSTICS_TMP_LFS='tmp07'
+HOSTNAME_TMP='porch+wingedhelix'
+DAT_LFS='dat07'
+TMP_ROOT_DIAGNOSTICS_DIR="/groups/${GROUP}/${TMP_LFS}/"
diff --git a/etc/wingedhelix.cfg b/etc/wingedhelix.cfg
@@ -0,0 +1,5 @@
+TMP_LFS='tmp07'
+PRM_LFS='prm07'
+SCR_LFS="${TMP_LFS}"
+DAT_LFS='dat07'
+HOSTNAME_TMP='porch+wingedhelix'
diff --git a/scripts/concordanceCheck_array-array.sh b/scripts/concordanceCheck_array-array.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+set -eu
+
+concordanceDir="/groups/umcg-atd/tmp06/concordance/array-array/"
+SAMPLE_ID="pathToSample"
+#
+##
+### Needs an index folder with a file to compare with
+### Data to compare with should be in ${concordanceDir}
+### Creates subfolders automatically
+### data will be bgzipped/tabix'ed
+### only first 100K lines will be selected for comparison
+### output will be in results ${concordanceDir}/results/
+##
+#
+
+echo "creating workfolders : results,tmp,samplesheets,jobs and original in ${concordanceDir}"
+mkdir -p "${concordanceDir}/"{results,tmp,samplesheets,jobs,original}
+
+module load HTSlib
+module list
+## indexVCF to compare all input files with:
+indexVcf="${concordanceDir}/index/${SAMPLE_ID}"
+
+## grep first 100K lines for comparison
+head -100000 "${indexVcf}" > "${indexVcf}.header100000"
+
+# get sampleID out of filename
+cp "${indexVcf}" "${concordanceDir}/original/"
+indexBase=$(basename "${indexVcf%%.*}")
+index="${indexBase}"
+
+#CompareGenotypes needs a bgzipped file
+bgzip -c "${indexVcf}.header100000" > "${indexVcf}.gz"
+tabix -p vcf "${indexVcf}.gz"
+
+for i in "${concordanceDir}"*".vcf"
+do
+	# get sampleID without path
+	sampleIDBase=$(basename "${i}")
+	# get sampleID without extension
+	sampleID="${sampleIDBase%%.*}"
+
+	#outputFile prefix
+	concordanceCheckId="${index}_${sampleID}"
+
+	#create sampleSheet for CompareGenotypes script
+	sampleSheet="${concordanceDir}//samplesheets/${index}_${sampleID}.sampleId.txt"
+	echo -e "data1Id\tdata2Id\tlocation1\tlocation2" > "${sampleSheet}"
+	echo -e "${index}\t${sampleID}\t${indexVcf}\t${i}" >> "${sampleSheet}"
+
+	## grep first 100K lines for comparison
+	head -100000 "${i}" > "${i}.header100000"
+
+	#CompareGenotypes needs a bgzipped file
+	echo "tabixing ${i}"
+	bgzip -c "${i}.header100000" > "${i}.gz"
+	tabix -p vcf  "${i}.gz"
+
+
+# create jobs (EOH must stay left aligned, NO INDENTATION!
+
+cat << EOH > "${concordanceDir}/jobs/${concordanceCheckId}.sh"
+#!/bin/bash
+#SBATCH --job-name=Concordance_${concordanceCheckId}
+#SBATCH --output=${concordanceDir}/jobs/${concordanceCheckId}.out
+#SBATCH --error=${concordanceDir}/jobs/${concordanceCheckId}.err
+#SBATCH --time=00:30:00
+#SBATCH --cpus-per-task 1
+#SBATCH --mem 6gb
+#SBATCH --open-mode=append
+#SBATCH --export=NONE
+#SBATCH --get-user-env=60L
+
+set -eu
+	module load CompareGenotypeCalls
+	module load BEDTools
+	module list
+	java -XX:ParallelGCThreads=1 -Djava.io.tmpdir="${concordanceDir}/temp/" -Xmx9g -jar \${EBROOTCOMPAREGENOTYPECALLS}/CompareGenotypeCalls.jar \
+	-d1 "${indexVcf}.gz" \
+	-D1 VCF \
+	-d2 "${i}.gz" \
+	-D2 VCF \
+	-ac \
+	--sampleMap "${sampleSheet}" \
+	-o "${concordanceDir}/tmp/${concordanceCheckId}" \
+	-sva
+
+	echo "moving ${concordanceDir}/tmp/${concordanceCheckId}.sample to ${concordanceDir}/results/"
+	mv "${concordanceDir}/tmp/${concordanceCheckId}.sample" "${concordanceDir}/results/"
+	echo "moving ${concordanceDir}/tmp/${concordanceCheckId}.variants to ${concordanceDir}/results/"
+	mv "${concordanceDir}/tmp/${concordanceCheckId}.variants" "${concordanceDir}/results/"
+
+	echo "finished"
+	if [ -e "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.started" ]
+	then
+		mv "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck."{started,finished}
+	else
+		touch "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.finished"
+	fi
+
+	mv "${concordanceDir}/jobs/${concordanceCheckId}.sh."{started,finished}
+EOH
+
+echo "submitting: ${concordanceDir}/jobs/${concordanceCheckId}.sh"
+sbatch "${concordanceDir}/jobs/${concordanceCheckId}.sh"
+
+done
+
+