Skip to content

Commit

Permalink
Merge pull request #21 from BenjaminsM/master
Browse files Browse the repository at this point in the history
Changes from Roan for winged helix and circleci instead of jenkins
  • Loading branch information
pneerincx authored Mar 31, 2023
2 parents 6f2e541 + e35aabf commit 4a8da6e
Show file tree
Hide file tree
Showing 12 changed files with 248 additions and 40 deletions.
51 changes: 51 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---
#
# Python CircleCI 2.0 configuration file
# Check https://circleci.com/docs/2.0/language-python/ for more details.
#
version: 2.1
orbs:
shellcheck: circleci/[email protected]
jobs:
build:
docker:
- image: cimg/base:stable
working_directory: ~/repo
resource_class: small
steps:
- checkout
- run:
name: Install ShellCheck
command: |
set -e
set -u
set -o pipefail
# Check if we are root or need sudo.
if [[ ${EUID} == 0 ]]; then
export SUDO=''
else
export SUDO='sudo'
fi
SHELLCHECK_VERSION=0.8.0
wget -qO- "https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.x86_64.tar.xz" \
| tar -xJf -
cd "shellcheck-v${SHELLCHECK_VERSION}/"
${SUDO} cp shellcheck /usr/local/bin/
- run:
name: Run IndentationCheck
shell: /bin/bash
command: |
set -e
set -u
set -o pipefail
check/indentationcheck.sh
- run:
name: Run ShellCheck
shell: /bin/bash
command: |
set -e
set -u
set -o pipefail
check/shellcheck.sh
...

28 changes: 0 additions & 28 deletions Jenkinsfile

This file was deleted.

4 changes: 0 additions & 4 deletions bin/ConcordanceCheck.sh
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,8 @@ do
touch "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.started"
arrayId=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $1}')
arrayVcf="${arrayId}.FINAL.vcf"
arrayFileLocation=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $3}')
rsync -av --copy-links "${arrayFileLocation}" "${arrayVcfDir}"
ngsId=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $2}')
ngsVcf="${ngsId}.final.vcf.gz"
ngsFileLocation=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $4}')
rsync -av --copy-links "${ngsFileLocation}" "${ngsVcfDir}"

bedType="$(zcat "${ngsVcfDir}/${ngsVcf}" | grep -m 1 -o -P 'intervals=\[[^\]]*.bed\]' | cut -d [ -f2 | cut -d ] -f1)"
bedDir="$(dirname "${bedType}")"
Expand Down
14 changes: 10 additions & 4 deletions bin/ParseDarwinSamplesheet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ function showHelp() {
#
cat <<EOH
======================================================================================================================
Scripts to make automatically a samplesheet for the concordance check between ngs and array data.
Scripts to make automatically a samplesheet for the concordance check between ngs and array data and pushes the ngs and
array data to the destination machine.
ngs.vcf should be in /groups/${NGSGROUP}/${PRM_LFS}/concordance/ngs/.
array.vcf should be in /groups/${ARRAYGROUP}/${PRM_LFS}/concordance/array/.
Expand Down Expand Up @@ -189,7 +190,7 @@ fi
#kolom 5: DNA nummer array

mapfile -t sampleSheetsDarwin < <(find "/groups/${GROUP}/${DAT_LFS}/ConcordanceCheckSamplesheets/" -maxdepth 1 -type f -name '*.csv')
if [[ "${#sampleSheetsDarwin[@]:-0}" -eq '0' ]]
if [[ "${#sampleSheetsDarwin[@]}" -eq '0' ]]
then
log4Bash 'WARN' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "No sample sheets found @ /groups/${GROUP}/${DAT_LFS}/ConcordanceCheckSamplesheets/: There is nothing to do."
trap - EXIT
Expand All @@ -213,7 +214,7 @@ else
if [ -e "${ngsPath[0]}" ]
then
mapfile -t ngsVcf < <(find "/groups/${NGSGROUP}/prm0"*"/projects/"*"${projectNGS}"*"/run01/results/variants/" -maxdepth 1 -name "*${dnaNGS}*.vcf.gz")
if [[ "${#ngsVcf[@]:-0}" -eq '0' ]]
if [[ "${#ngsVcf[@]}" -eq '0' ]]
then
log4Bash 'WARN' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "/groups/${GROUP}/*prm0*/projects/${projectNGS}*/run*/results/variants/*${dnaNGS}*.vcf.gz NOT FOUND! skipped"
continue
Expand All @@ -231,7 +232,7 @@ else
then
mapfile -t arrayVcf < <(find "/groups/${ARRAYGROUP}/prm0"*"/projects/"*"${projectArray}"*"/run01/results/vcf" -maxdepth 1 -name "${dnaArray}*.vcf")

if [[ "${#arrayVcf[@]:-0}" -eq '0' ]]
if [[ "${#arrayVcf[@]}" -eq '0' ]]
then
log4Bash 'WARN' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "/groups/${ARRAYGROUP}/prm0*/projects/*${projectArray}*/run*/results/vcf/${dnaArray}*.vcf NOT FOUND! skipped"
else
Expand All @@ -243,6 +244,11 @@ else
continue
fi
host_prm=$(hostname -s)

#rsync data to tmp
rsync -av "${arrayVcf[0]}" "${HOSTNAME_TMP}:/groups/${GROUP}/${TMP_LFS}/concordance/array/"
rsync -av "${ngsVcf[0]}" "${HOSTNAME_TMP}:/groups/${GROUP}/${TMP_LFS}/concordance/ngs/"

# shellcheck disable=SC2029
ssh "${HOSTNAME_TMP}" "echo -e \"data1Id\tdata2Id\tlocation1\tlocation2\n${arrayId}\t${ngsVcfId}\t${host_prm}:${arrayVcf[0]}\t${host_prm}:${ngsVcf[0]}\" > \"/groups/${GROUP}/${TMP_LFS}/concordance/samplesheets/${samplesheetName}.sampleId.txt\""
log4Bash 'INFO' "${LINENO}" "${FUNCNAME[0]:-main}" '0' "samplesheet created on ${HOSTNAME_TMP}: /groups/${GROUP}/${TMP_LFS}/concordance/samplesheets/${samplesheetName}.sampleId.txt"
Expand Down
2 changes: 1 addition & 1 deletion bin/cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ do
log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' \
"check ${ConcordanceID}, ngsId:${ngsId} and arrayId:${arrayId} ngsDnaNo:${ngsDnaNo}, arrayDnaNo:${arrayDnaNo}"

if ssh -n "${ATEAMBOTUSER}@${HOSTNAME_PRM}" test -e "/groups/${group}/${PRM_LFS}/concordance/logs/${ConcordanceID}.copyConcordanceCheckData.finished"
if test -e "${concordanceDir}/logs/${ConcordanceID}.ConcordanceCheck.finished" 2>/dev/null
then
log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' \
"rm -rf ${concordanceDir}/jobs/${ConcordanceID}.*/
Expand Down
50 changes: 50 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
#
# Python CircleCI 2.0 configuration file
# Check https://circleci.com/docs/2.0/language-python/ for more details.
#
version: 2.1
orbs:
shellcheck: circleci/[email protected]
jobs:
build:
docker:
- image: cimg/base:stable
working_directory: ~/repo
resource_class: small
steps:
- checkout
- run:
name: Install ShellCheck
command: |
set -e
set -u
set -o pipefail
# Check if we are root or need sudo.
if [[ ${EUID} == 0 ]]; then
export SUDO=''
else
export SUDO='sudo'
fi
SHELLCHECK_VERSION=0.8.0
wget -qO- "https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.x86_64.tar.xz" \
| tar -xJf -
cd "shellcheck-v${SHELLCHECK_VERSION}/"
${SUDO} cp shellcheck /usr/local/bin/
- run:
name: Run IndentationCheck
shell: /bin/bash
command: |
set -e
set -u
set -o pipefail
check/indentationcheck.sh
- run:
name: Run ShellCheck
shell: /bin/bash
command: |
set -e
set -u
set -o pipefail
check/shellcheck.sh
...
6 changes: 3 additions & 3 deletions etc/ConcordanceCheck.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
htsLibVersion='HTSlib/1.3.2-foss-2015b'
compareGenotypeCallsVersion='CompareGenotypeCalls/1.8.1-Java-1.8.0_74'
bedToolsVersion='BEDTools/2.25.0-foss-2015b'
htsLibVersion='HTSlib/1.14-foss-2018b'
compareGenotypeCallsVersion='CompareGenotypeCalls/1.8.1-Java-8-LTS'
bedToolsVersion='BEDTools/2.30.0-foss-2018b'
5 changes: 5 additions & 0 deletions etc/betabarrel.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
TMP_LFS='tmp05'
PRM_LFS='prm05'
SCR_LFS="${TMP_LFS}"
DAT_LFS='dat05'
HOSTNAME_TMP='porch+betabarrel'
5 changes: 5 additions & 0 deletions etc/copperfist.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
TMP_LFS='tmp06'
PRM_LFS='prm06'
SCR_LFS="${TMP_LFS}"
DAT_LFS='dat06'
HOSTNAME_TMP='porch+copperfist'
8 changes: 8 additions & 0 deletions etc/uozkh1016.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
TMP_LFS='tmp07'
PRM_LFS='prm07'
SCR_LFS='${TMP_LFS}'
HOSTNAME_PRM='localhost'
DIAGNOSTICS_TMP_LFS='tmp07'
HOSTNAME_TMP='porch+wingedhelix'
DAT_LFS='dat07'
TMP_ROOT_DIAGNOSTICS_DIR="/groups/${GROUP}/${TMP_LFS}/"
5 changes: 5 additions & 0 deletions etc/wingedhelix.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
TMP_LFS='tmp07'
PRM_LFS='prm07'
SCR_LFS="${TMP_LFS}"
DAT_LFS='dat07'
HOSTNAME_TMP='porch+wingedhelix'
110 changes: 110 additions & 0 deletions scripts/concordanceCheck_array-array.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/bin/bash
set -eu

concordanceDir="/groups/umcg-atd/tmp06/concordance/array-array/"
SAMPLE_ID="pathToSample"
#
##
### Needs an index folder with a file to compare with
### Data to compare with should be in ${concordanceDir}
### Creates subfolders automatically
### data will be bgzipped/tabix'ed
### only first 100K lines will be selected for comparison
### output will be in results ${concordanceDir}/results/
##
#

echo "creating workfolders : results,tmp,samplesheets,jobs and original in ${concordanceDir}"
mkdir -p "${concordanceDir}/"{results,tmp,samplesheets,jobs,original}

module load HTSlib
module list
## indexVCF to compare all input files with:
indexVcf="${concordanceDir}/index/${SAMPLE_ID}"

## grep first 100K lines for comparison
head -100000 "${indexVcf}" > "${indexVcf}.header100000"

# get sampleID out of filename
cp "${indexVcf}" "${concordanceDir}/original/"
indexBase=$(basename "${indexVcf%%.*}")
index="${indexBase}"

#CompareGenotypes needs a bgzipped file
bgzip -c "${indexVcf}.header100000" > "${indexVcf}.gz"
tabix -p vcf "${indexVcf}.gz"

for i in "${concordanceDir}"*".vcf"
do
# get sampleID without path
sampleIDBase=$(basename "${i}")
# get sampleID without extension
sampleID="${sampleIDBase%%.*}"

#outputFile prefix
concordanceCheckId="${index}_${sampleID}"

#create sampleSheet for CompareGenotypes script
sampleSheet="${concordanceDir}//samplesheets/${index}_${sampleID}.sampleId.txt"
echo -e "data1Id\tdata2Id\tlocation1\tlocation2" > "${sampleSheet}"
echo -e "${index}\t${sampleID}\t${indexVcf}\t${i}" >> "${sampleSheet}"

## grep first 100K lines for comparison
head -100000 "${i}" > "${i}.header100000"

#CompareGenotypes needs a bgzipped file
echo "tabixing ${i}"
bgzip -c "${i}.header100000" > "${i}.gz"
tabix -p vcf "${i}.gz"


# create jobs (EOH must stay left aligned, NO INDENTATION!

cat << EOH > "${concordanceDir}/jobs/${concordanceCheckId}.sh"
#!/bin/bash
#SBATCH --job-name=Concordance_${concordanceCheckId}
#SBATCH --output=${concordanceDir}/jobs/${concordanceCheckId}.out
#SBATCH --error=${concordanceDir}/jobs/${concordanceCheckId}.err
#SBATCH --time=00:30:00
#SBATCH --cpus-per-task 1
#SBATCH --mem 6gb
#SBATCH --open-mode=append
#SBATCH --export=NONE
#SBATCH --get-user-env=60L
set -eu
module load CompareGenotypeCalls
module load BEDTools
module list
java -XX:ParallelGCThreads=1 -Djava.io.tmpdir="${concordanceDir}/temp/" -Xmx9g -jar \${EBROOTCOMPAREGENOTYPECALLS}/CompareGenotypeCalls.jar \
-d1 "${indexVcf}.gz" \
-D1 VCF \
-d2 "${i}.gz" \
-D2 VCF \
-ac \
--sampleMap "${sampleSheet}" \
-o "${concordanceDir}/tmp/${concordanceCheckId}" \
-sva
echo "moving ${concordanceDir}/tmp/${concordanceCheckId}.sample to ${concordanceDir}/results/"
mv "${concordanceDir}/tmp/${concordanceCheckId}.sample" "${concordanceDir}/results/"
echo "moving ${concordanceDir}/tmp/${concordanceCheckId}.variants to ${concordanceDir}/results/"
mv "${concordanceDir}/tmp/${concordanceCheckId}.variants" "${concordanceDir}/results/"
echo "finished"
if [ -e "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.started" ]
then
mv "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck."{started,finished}
else
touch "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.finished"
fi
mv "${concordanceDir}/jobs/${concordanceCheckId}.sh."{started,finished}
EOH

echo "submitting: ${concordanceDir}/jobs/${concordanceCheckId}.sh"
sbatch "${concordanceDir}/jobs/${concordanceCheckId}.sh"

done


0 comments on commit 4a8da6e

Please sign in to comment.