From 03176384ea00546fe8fbb57598eb17cebb4307de Mon Sep 17 00:00:00 2001
From: shafin <shafin@google.com>
Date: Tue, 10 Oct 2023 13:56:44 -0700
Subject: [PATCH] Add ONT option to run_deeptio and set `--cpus 0` for
 post_processing.

PiperOrigin-RevId: 572358097
---
 docs/deeptrio-ont-case-study.md | 308 ++++++++++++++++++++++++++++++++
 scripts/run_deeptrio.py         |  14 +-
 scripts/run_deeptrio_test.py    |  97 +++++-----
 3 files changed, 362 insertions(+), 57 deletions(-)
 create mode 100644 docs/deeptrio-ont-case-study.md

diff --git a/docs/deeptrio-ont-case-study.md b/docs/deeptrio-ont-case-study.md
new file mode 100644
index 00000000..15c72f92
--- /dev/null
+++ b/docs/deeptrio-ont-case-study.md
@@ -0,0 +1,308 @@
+# Using DeepTrio for small variant calling from the trio sequenced with ONT R10.4
+
+In this case study, we describe applying [DeepTrio](deeptrio-details.md) to a
+real ONT R10.4 trio. Then we assess the quality of the DeepTrio variant calls
+with `hap.py`. In addition we evaluate a Mendelian violation rate for a merged
+VCF.
+
+To make it faster to run over this case study, we run only on chromosome 20.
+
+## Prepare environment
+
+### Tools
+
+[Docker](https://docs.docker.com/get-docker/) will be used to run DeepTrio and
+[hap.py](https://github.com/illumina/hap.py),
+
+### Download Reference
+
+We will be using GRCh38 for this case study.
+
+```bash
+mkdir -p reference
+
+FTPDIR=ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids
+
+curl ${FTPDIR}/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz | gunzip > reference/GRCh38_no_alt_analysis_set.fasta
+curl ${FTPDIR}/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.fai > reference/GRCh38_no_alt_analysis_set.fasta.fai
+```
+
+### Download Genome in a Bottle Benchmarks
+
+We will benchmark our variant calls against v4.2.1 of the Genome in a Bottle
+small variant benchmarks for HG002, HG003, and HG004 trio.
+
+```bash
+mkdir -p benchmark
+
+FTPDIR=ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/release/AshkenazimTrio
+
+curl ${FTPDIR}/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed > benchmark/HG002_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed
+curl ${FTPDIR}/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz > benchmark/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz
+curl ${FTPDIR}/HG002_NA24385_son/NISTv4.2.1/GRCh38/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz.tbi > benchmark/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz.tbi
+
+curl ${FTPDIR}/HG003_NA24149_father/NISTv4.2.1/GRCh38/HG003_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed > benchmark/HG003_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed
+curl ${FTPDIR}/HG003_NA24149_father/NISTv4.2.1/GRCh38/HG003_GRCh38_1_22_v4.2.1_benchmark.vcf.gz > benchmark/HG003_GRCh38_1_22_v4.2.1_benchmark.vcf.gz
+curl ${FTPDIR}/HG003_NA24149_father/NISTv4.2.1/GRCh38/HG003_GRCh38_1_22_v4.2.1_benchmark.vcf.gz.tbi > benchmark/HG003_GRCh38_1_22_v4.2.1_benchmark.vcf.gz.tbi
+
+curl ${FTPDIR}/HG004_NA24143_mother/NISTv4.2.1/GRCh38/HG004_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed > benchmark/HG004_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed
+curl ${FTPDIR}/HG004_NA24143_mother/NISTv4.2.1/GRCh38/HG004_GRCh38_1_22_v4.2.1_benchmark.vcf.gz > benchmark/HG004_GRCh38_1_22_v4.2.1_benchmark.vcf.gz
+curl ${FTPDIR}/HG004_NA24143_mother/NISTv4.2.1/GRCh38/HG004_GRCh38_1_22_v4.2.1_benchmark.vcf.gz.tbi > benchmark/HG004_GRCh38_1_22_v4.2.1_benchmark.vcf.gz.tbi
+```
+
+### Download HG002, HG003, and HG004 BAM files
+
+We'll use HG002, HG003, HG004 ONT R10.4 reads. These reads have been
+aligned to the GRCh38_no_alt_analysis reference using minimap2.
+
+```bash
+mkdir -p input
+HTTPDIR=https://storage.googleapis.com/deepvariant/ont-case-study-testdata
+
+curl ${HTTPDIR}/HG002_R104_sup_merged.50x.chr20.bam > input/HG002_R104_sup_merged.50x.chr20.bam
+curl ${HTTPDIR}/HG002_R104_sup_merged.50x.chr20.bam.bai > input/HG002_R104_sup_merged.50x.chr20.bam.bai
+
+curl ${HTTPDIR}/HG003_R104_sup_merged.40x.chr20.bam > input/HG003_R104_sup_merged.40x.chr20.bam
+curl ${HTTPDIR}/HG003_R104_sup_merged.40x.chr20.bam.bai > input/HG003_R104_sup_merged.40x.chr20.bam.bai
+
+curl ${HTTPDIR}/HG004_R104_sup_merged.40x.chr20.bam > input/HG004_R104_sup_merged.40x.chr20.bam
+curl ${HTTPDIR}/HG004_R104_sup_merged.40x.chr20.bam.bai > input/HG004_R104_sup_merged.40x.chr20.bam.bai
+```
+
+## Running DeepTrio with one command
+
+DeepTrio pipeline consists of 4 steps: `make_examples`, `call_variants`,
+`postprocess_variants` and `GLnexus merge`. It is possible to run the first
+three steps with one command using the `run_deeptrio` script. GLnexus
+is run as a separate command.
+
+### Running on a CPU-only machine
+
+```bash
+mkdir -p output
+mkdir -p output/intermediate_results_dir
+
+BIN_VERSION="1.6.0"
+
+sudo apt -y update
+sudo apt-get -y install docker.io
+sudo docker pull google/deepvariant:deeptrio-"${BIN_VERSION}"
+
+time sudo docker run \
+  -v "${PWD}/input":"/input" \
+  -v "${PWD}/output":"/output" \
+  -v "${PWD}/reference":"/reference" \
+  google/deepvariant:deeptrio-"${BIN_VERSION}" \
+  /opt/deepvariant/bin/deeptrio/run_deeptrio \
+  --model_type ONT \
+  --ref /reference/GRCh38_no_alt_analysis_set.fasta \
+  --reads_child /input/HG002_R104_sup_merged.50x.chr20.bam \
+  --reads_parent1 /input/HG003_R104_sup_merged.40x.chr20.bam \
+  --reads_parent2 /input/HG004_R104_sup_merged.40x.chr20.bam \
+  --output_vcf_child /output/HG002.output.vcf.gz \
+  --output_vcf_parent1 /output/HG003.output.vcf.gz \
+  --output_vcf_parent2 /output/HG004.output.vcf.gz \
+  --sample_name_child 'HG002' \
+  --sample_name_parent1 'HG003' \
+  --sample_name_parent2 'HG004' \
+  --num_shards $(nproc) \
+  --intermediate_results_dir /output/intermediate_results_dir \
+  --output_gvcf_child /output/HG002.g.vcf.gz \
+  --output_gvcf_parent1 /output/HG003.g.vcf.gz \
+  --output_gvcf_parent2 /output/HG004.g.vcf.gz \
+  --regions chr20
+```
+
+By specifying `--model_type ONT`, you'll be using a model that is best suited
+for ONT R10.4 Whole Genome Sequencing data.
+
+NOTE: If you want to run each of the steps separately, add `--dry_run=true`
+to the command above to figure out what flags you need in each step. Based on
+the different model types, different flags are needed in the `make_examples`
+step.
+
+`--intermediate_results_dir` flag is optional. By specifying it, the
+intermediate outputs of `make_examples` and `call_variants` stages can be found
+in the directory. After the command, you can find these files in the directory:
+
+```
+call_variants_output_child.tfrecord.gz
+call_variants_output_parent1.tfrecord.gz
+call_variants_output_parent2.tfrecord.gz
+
+gvcf_child.tfrecord-?????-of-?????.gz
+gvcf_parent1.tfrecord-?????-of-?????.gz
+gvcf_parent2.tfrecord-?????-of-?????.gz
+
+make_examples_child.tfrecord-?????-of-?????.gz
+make_examples_parent1.tfrecord-?????-of-?????.gz
+make_examples_parent2.tfrecord-?????-of-?????.gz
+```
+
+For running on GPU machines, or using Singularity instead of Docker, see
+[Quick Start](deepvariant-quick-start.md).
+
+## Merge VCFs using GLnexus
+
+At this step we take all 3 VCFs generated in the previous step and merge them
+using GLnexus.
+
+```bash
+sudo docker pull quay.io/mlin/glnexus:v1.2.7
+
+# bcftools and bgzip are now included in our docker images.
+# You can also install them separately.
+sudo docker run \
+  -v "${PWD}/output":"/output" \
+  quay.io/mlin/glnexus:v1.2.7 \
+  /usr/local/bin/glnexus_cli \
+  --config DeepVariant_unfiltered \
+  /output/HG002.g.vcf.gz \
+  /output/HG003.g.vcf.gz \
+  /output/HG004.g.vcf.gz \
+  | sudo docker run -i google/deepvariant:deeptrio-"${BIN_VERSION}" \
+    bcftools view - \
+  | sudo docker run -i google/deepvariant:deeptrio-"${BIN_VERSION}" \
+    bgzip -c > output/HG002_trio_merged.vcf.gz
+```
+
+After completion of GLnexus command we should have a new merged VCF file in the
+output directory.
+
+```
+HG002_trio_merged.vcf.gz
+```
+
+## Benchmark on chr20
+
+### Calculate Mendelian Violation rate
+
+```bash
+sudo docker pull realtimegenomics/rtg-tools
+
+sudo docker run \
+  -v "${PWD}/input":"/input" \
+  -v "${PWD}/reference":"/reference" \
+  realtimegenomics/rtg-tools format \
+  -o /reference/GRCh38_no_alt_analysis_set.sdf "/reference/GRCh38_no_alt_analysis_set.fasta"
+
+FILE="reference/trio.ped"
+cat <<EOM >$FILE
+#PED format pedigree
+#
+#fam-id/ind-id/pat-id/mat-id: 0=unknown
+#sex: 1=male; 2=female; 0=unknown
+#phenotype: -9=missing, 0=missing; 1=unaffected; 2=affected
+#
+#fam-id ind-id pat-id mat-id sex phen
+1 HG002 HG003 HG004 1 0
+1 HG003 0 0 1 0
+1 HG004 0 0 2 0
+EOM
+
+sudo docker run \
+-v "${PWD}/input":"/input" \
+-v "${PWD}/reference":"/reference" \
+-v "${PWD}/output":"/output" \
+realtimegenomics/rtg-tools mendelian \
+-i "/output/HG002_trio_merged.vcf.gz" \
+-o "/output/HG002_trio_annotated.output.vcf.gz" \
+--pedigree=/reference/trio.ped \
+-t /reference/GRCh38_no_alt_analysis_set.sdf \
+| tee output/deepvariant.input_rtg_output.txt
+```
+
+As a result we should get the following output:
+
+```bash
+Checking: /output/HG002_trio_merged.vcf.gz
+Family: [HG003 + HG004] -> [HG002]
+164 non-pass records were skipped
+Concordance HG002: F:142343/143808 (98.98%)  M:144330/146059 (98.82%)  F+M:135630/138870 (97.67%)
+Sample HG002 has less than 99.0 concordance with both parents. Check for incorrect pedigree or sample mislabelling.
+0/168963 (0.00%) records did not conform to expected call ploidy
+156632/168963 (92.70%) records were variant in at least 1 family member and checked for Mendelian constraints
+16308/156632 (10.41%) records had indeterminate consistency status due to incomplete calls
+4099/156632 (2.62%) records contained a violation of Mendelian constraints
+```
+
+### Benchmark variant calls against 4.2.1 truth set with hap.py
+
+```bash
+mkdir -p happy
+
+sudo docker pull jmcdani20/hap.py:v0.3.12
+
+sudo docker run \
+  -v "${PWD}/benchmark":"/benchmark" \
+  -v "${PWD}/input":"/input" \
+  -v "${PWD}/output":"/output" \
+  -v "${PWD}/reference":"/reference" \
+  -v "${PWD}/happy:/happy" \
+  jmcdani20/hap.py:v0.3.12 /opt/hap.py/bin/hap.py \
+  /benchmark/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz \
+  /output/HG002.output.vcf.gz \
+  -f /benchmark/HG002_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed \
+  -r /reference/GRCh38_no_alt_analysis_set.fasta \
+  -o /happy/HG002.output \
+  --engine=vcfeval \
+  --pass-only \
+  -l chr20
+
+sudo docker run \
+  -v "${PWD}/benchmark":"/benchmark" \
+  -v "${PWD}/input":"/input" \
+  -v "${PWD}/output":"/output" \
+  -v "${PWD}/reference":"/reference" \
+  -v "${PWD}/happy:/happy" \
+  jmcdani20/hap.py:v0.3.12 /opt/hap.py/bin/hap.py \
+  /benchmark/HG003_GRCh38_1_22_v4.2.1_benchmark.vcf.gz \
+  /output/HG003.output.vcf.gz \
+  -f /benchmark/HG003_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed \
+  -r /reference/GRCh38_no_alt_analysis_set.fasta \
+  -o /happy/HG003.output \
+  --engine=vcfeval \
+  --pass-only \
+  -l chr20
+
+sudo docker run \
+  -v "${PWD}/benchmark":"/benchmark" \
+  -v "${PWD}/input":"/input" \
+  -v "${PWD}/output":"/output" \
+  -v "${PWD}/reference":"/reference" \
+  -v "${PWD}/happy:/happy" \
+  jmcdani20/hap.py:v0.3.12 /opt/hap.py/bin/hap.py \
+  /benchmark/HG004_GRCh38_1_22_v4.2.1_benchmark.vcf.gz \
+  /output/HG004.output.vcf.gz \
+  -f /benchmark/HG004_GRCh38_1_22_v4.2.1_benchmark_noinconsistent.bed \
+  -r /reference/GRCh38_no_alt_analysis_set.fasta \
+  -o /happy/HG004.output \
+  --engine=vcfeval \
+  --pass-only \
+  -l chr20
+```
+
+```
+Benchmarking Summary for HG002:
+Benchmarking Summary:
+Type Filter  TRUTH.TOTAL  TRUTH.TP  TRUTH.FN  QUERY.TOTAL  QUERY.FP  QUERY.UNK  FP.gt  FP.al  METRIC.Recall  METRIC.Precision  METRIC.Frac_NA  METRIC.F1_Score  TRUTH.TOTAL.TiTv_ratio  QUERY.TOTAL.TiTv_ratio  TRUTH.TOTAL.het_hom_ratio  QUERY.TOTAL.het_hom_ratio
+INDEL    ALL        11256      9916      1340        20944      1295       9272    478    519       0.880952          0.889051        0.442704         0.884983                     NaN                     NaN                   1.561710                   2.346610
+INDEL   PASS        11256      9916      1340        20944      1295       9272    478    519       0.880952          0.889051        0.442704         0.884983                     NaN                     NaN                   1.561710                   2.346610
+  SNP    ALL        71333     71199       134       100632        84      29325     59     22       0.998121          0.998822        0.291408         0.998472                2.314904                1.795903                   1.715978                   1.935631
+  SNP   PASS        71333     71199       134       100632        84      29325     59     22       0.998121          0.998822        0.291408         0.998472                2.314904                1.795903                   1.715978                   1.935631
+
+Benchmarking Summary for HG003:
+Benchmarking Summary:
+Type Filter  TRUTH.TOTAL  TRUTH.TP  TRUTH.FN  QUERY.TOTAL  QUERY.FP  QUERY.UNK  FP.gt  FP.al  METRIC.Recall  METRIC.Precision  METRIC.Frac_NA  METRIC.F1_Score  TRUTH.TOTAL.TiTv_ratio  QUERY.TOTAL.TiTv_ratio  TRUTH.TOTAL.het_hom_ratio  QUERY.TOTAL.het_hom_ratio
+INDEL    ALL        10628      9044      1584        20580      1348       9720    571    506       0.850960          0.875875        0.472303         0.863238                     NaN                     NaN                   1.748961                   2.292998
+INDEL   PASS        10628      9044      1584        20580      1348       9720    571    506       0.850960          0.875875        0.472303         0.863238                     NaN                     NaN                   1.748961                   2.292998
+  SNP    ALL        70166     70043       123        99648       138      29480     53     32       0.998247          0.998033        0.295841         0.998140                2.296566                1.833087                   1.883951                   1.994315
+  SNP   PASS        70166     70043       123        99648       138      29480     53     32       0.998247          0.998033        0.295841         0.998140                2.296566                1.833087                   1.883951                   1.994315
+
+Benchmarking Summary for HG004:
+Type Filter  TRUTH.TOTAL  TRUTH.TP  TRUTH.FN  QUERY.TOTAL  QUERY.FP  QUERY.UNK  FP.gt  FP.al  METRIC.Recall  METRIC.Precision  METRIC.Frac_NA  METRIC.F1_Score  TRUTH.TOTAL.TiTv_ratio  QUERY.TOTAL.TiTv_ratio  TRUTH.TOTAL.het_hom_ratio  QUERY.TOTAL.het_hom_ratio
+INDEL    ALL        11000      9480      1520        20898      1271       9674    511    526       0.861818          0.886761        0.462915         0.874111                     NaN                     NaN                   1.792709                   2.374125
+INDEL   PASS        11000      9480      1520        20898      1271       9674    511    526       0.861818          0.886761        0.462915         0.874111                     NaN                     NaN                   1.792709                   2.374125
+  SNP    ALL        71659     71563        96       101494        74      29834     31     40       0.998660          0.998967        0.293948         0.998814                2.310073                1.846227                   1.878340                   1.838233
+  SNP   PASS        71659     71563        96       101494        74      29834     31     40       0.998660          0.998967        0.293948         0.998814                2.310073                1.846227                   1.878340                   1.838233
+```
diff --git a/scripts/run_deeptrio.py b/scripts/run_deeptrio.py
index ea48f02c..5ca75f6a 100644
--- a/scripts/run_deeptrio.py
+++ b/scripts/run_deeptrio.py
@@ -536,7 +536,7 @@ def _make_examples_command(
     special_args['vsc_min_fraction_indels'] = 0.12
     special_args['alt_aligned_pileup'] = 'diff_channels'
     special_args['add_hp_channel'] = True
-    special_args['min_mapping_quality'] = 1
+    special_args['min_mapping_quality'] = 5
     special_args['track_ref_reads'] = True
     special_args['pileup_image_height_child'] = (
         DEEP_TRIO_ONT_PILEUP_HEIGHT_CHILD
@@ -551,6 +551,7 @@ def _make_examples_command(
     special_args['parse_sam_aux_fields'] = True
     special_args['discard_non_dna_regions'] = True
     special_args['max_reads_for_dynamic_bases_per_region'] = 200
+    special_args['max_reads_per_partition'] = 500
     kwargs = _update_kwargs_with_warning(kwargs, special_args)
 
   if _MODEL_TYPE.value == 'WES':
@@ -643,6 +644,7 @@ def postprocess_variants_command(
   command.extend(['--ref', '"{}"'.format(ref)])
   command.extend(['--infile', '"{}"'.format(infile)])
   command.extend(['--outfile', '"{}"'.format(outfile)])
+  command.extend(['--cpus 0'])
   if nonvariant_site_tfrecord_path is not None:
     command.extend([
         '--nonvariant_site_tfrecord_path',
@@ -866,9 +868,11 @@ def create_all_commands(intermediate_results_dir):
   # helps to better distribute the work between shards.
 
   candidate_partition_modes = [None]
-  # In DeepTrio PacBio mode, we always enable use_candidate_partition because
-  # otherwise it can run out of memory.
-  if _USE_CANDIDATE_PARTITION.value or _MODEL_TYPE.value == 'PACBIO':
+  # In DeepTrio PacBio and ONT mode, we always enable use_candidate_partition
+  # because otherwise it can run out of memory.
+  use_candidate_partition = False
+  if _USE_CANDIDATE_PARTITION.value or _MODEL_TYPE.value in ['PACBIO', 'ONT']:
+    use_candidate_partition = True
     candidate_partition_modes = [
         CandidatePartitionCommand.SWEEP,
         CandidatePartitionCommand.CANDIDATE_PARTITION_INFERENCE,
@@ -891,7 +895,7 @@ def create_all_commands(intermediate_results_dir):
                 intermediate_results_dir, _NUM_SHARDS.value
             ),
             candidate_partition_mode=candidate_partition_mode
-            if _USE_CANDIDATE_PARTITION.value
+            if use_candidate_partition
             else None,
             # kwargs:
             gvcf=nonvariant_site_tfrecord_path,
diff --git a/scripts/run_deeptrio_test.py b/scripts/run_deeptrio_test.py
index 377e551f..7a5f5499 100644
--- a/scripts/run_deeptrio_test.py
+++ b/scripts/run_deeptrio_test.py
@@ -128,6 +128,7 @@ def test_call_variants_postprocess_variants_commands(self, model_type):
             '--infile '
             '"/tmp/deeptrio_tmp_output/call_variants_output_child.tfrecord.gz" '
             '--outfile "your_vcf_child" '
+            '--cpus 0 '
             '--nonvariant_site_tfrecord_path '
             '"/tmp/deeptrio_tmp_output/gvcf_child.tfrecord@64.gz" '
             '--gvcf_outfile "your_gvcf_child"'
@@ -139,7 +140,8 @@ def test_call_variants_postprocess_variants_commands(self, model_type):
             'time /opt/deepvariant/bin/postprocess_variants --ref "your_ref"'
             ' --infile'
             ' "/tmp/deeptrio_tmp_output/call_variants_output_parent1.tfrecord.gz"'
-            ' --outfile "your_vcf_parent1" --nonvariant_site_tfrecord_path'
+            ' --outfile "your_vcf_parent1" --cpus 0'
+            ' --nonvariant_site_tfrecord_path'
             ' "/tmp/deeptrio_tmp_output/gvcf_parent1.tfrecord@64.gz"'
             ' --gvcf_outfile "your_gvcf_parent1"'
         ),
@@ -150,7 +152,8 @@ def test_call_variants_postprocess_variants_commands(self, model_type):
             'time /opt/deepvariant/bin/postprocess_variants --ref "your_ref"'
             ' --infile'
             ' "/tmp/deeptrio_tmp_output/call_variants_output_parent2.tfrecord.gz"'
-            ' --outfile "your_vcf_parent2" --nonvariant_site_tfrecord_path'
+            ' --outfile "your_vcf_parent2" --cpus 0'
+            ' --nonvariant_site_tfrecord_path'
             ' "/tmp/deeptrio_tmp_output/gvcf_parent2.tfrecord@64.gz"'
             ' --gvcf_outfile "your_gvcf_parent2"'
         ),
@@ -227,6 +230,7 @@ def test_duo_call_variants_postprocess_variants_commands(
             '--infile '
             '"/tmp/deeptrio_tmp_output/call_variants_output_child.tfrecord.gz" '
             '--outfile "your_vcf_child" '
+            '--cpus 0 '
             '--nonvariant_site_tfrecord_path '
             '"/tmp/deeptrio_tmp_output/gvcf_child.tfrecord@64.gz" '
             '--gvcf_outfile "your_gvcf_child"'
@@ -238,7 +242,8 @@ def test_duo_call_variants_postprocess_variants_commands(
             'time /opt/deepvariant/bin/postprocess_variants --ref "your_ref"'
             ' --infile'
             ' "/tmp/deeptrio_tmp_output/call_variants_output_parent1.tfrecord.gz"'
-            ' --outfile "your_vcf_parent1" --nonvariant_site_tfrecord_path'
+            ' --outfile "your_vcf_parent1" --cpus 0'
+            ' --nonvariant_site_tfrecord_path'
             ' "/tmp/deeptrio_tmp_output/gvcf_parent1.tfrecord@64.gz"'
             ' --gvcf_outfile "your_gvcf_parent1"'
         ),
@@ -271,21 +276,8 @@ def test_duo_call_variants_postprocess_variants_commands(
           + '--pileup_image_height_parent "100" ',
       ),
       (
-          'PACBIO',
-          False,
-          '--add_hp_channel --alt_aligned_pileup "diff_channels" '
-          + '--discard_non_dna_regions '
-          + '--gvcf "/tmp/deeptrio_tmp_output/gvcf.tfrecord@64.gz" '
-          + '--max_reads_for_dynamic_bases_per_region "200" '
-          + '--min_mapping_quality "1" '
-          + '--parse_sam_aux_fields --partition_size "25000" --phase_reads '
-          + '--pileup_image_height_child "60" '
-          + '--pileup_image_height_parent "40" --pileup_image_width "199" '
-          + '--norealign_reads --sort_by_haplotypes '
-          + '--track_ref_reads '
-          + '--vsc_min_fraction_indels "0.12" ',
-      ),
-      (
+          # For pacbio candidate paritionning is turned on by default.
+          # Currently, there is no way to disable it.
           'PACBIO',
           True,
           '--add_hp_channel --alt_aligned_pileup "diff_channels" '
@@ -343,6 +335,8 @@ def test_make_examples_commands_with_types(
     FLAGS.output_gvcf_parent1 = 'your_gvcf_parent1'
     FLAGS.output_gvcf_parent2 = 'your_gvcf_parent2'
     FLAGS.num_shards = 64
+    if model_type == 'PACBIO':
+      use_candidate_partition = True
     make_examples_command_index = 1 if use_candidate_partition else 0
     commands, _, _ = self._create_all_commands_and_check_stdout()
     self.assertEqual(
@@ -463,6 +457,8 @@ def test_make_examples_commands_with_candidate_partition(
       (
           'PACBIO',
           '--add_hp_channel --alt_aligned_pileup "diff_channels" '
+          + '--candidate_positions '
+          + '"/tmp/deeptrio_tmp_output/candidate_positions@64" '
           + '--discard_non_dna_regions '
           + '--gvcf "/tmp/deeptrio_tmp_output/gvcf.tfrecord@64.gz" '
           + '--max_reads_for_dynamic_bases_per_region "200" '
@@ -491,8 +487,12 @@ def test_duo_make_examples_commands_with_types(
     FLAGS.output_gvcf_parent1 = 'your_gvcf_parent1'
     FLAGS.num_shards = 64
     commands, _, _ = self._create_all_commands_and_check_stdout()
+    use_candidate_partition = False
+    if model_type == 'PACBIO':
+      use_candidate_partition = True
+    make_examples_command_index = 1 if use_candidate_partition else 0
     self.assertEqual(
-        commands[0],
+        commands[make_examples_command_index],
         'time seq 0 63 '
         '| parallel -q --halt 2 --line-buffer '
         '/opt/deepvariant/bin/deeptrio/make_examples '
@@ -511,44 +511,36 @@ def test_duo_make_examples_commands_with_types(
       (
           None,
           (
-              '--add_hp_channel '
-              '--alt_aligned_pileup "diff_channels" '
-              '--discard_non_dna_regions '
-              '--gvcf "/tmp/deeptrio_tmp_output/gvcf.tfrecord@64.gz" '
-              '--max_reads_for_dynamic_bases_per_region "200" '
-              '--min_mapping_quality "1" '
-              '--parse_sam_aux_fields '
-              '--partition_size "25000" '
-              '--phase_reads '
-              '--pileup_image_height_child "60" '
-              '--pileup_image_height_parent "40" '
-              '--pileup_image_width "199" '
-              '--norealign_reads '
-              '--sort_by_haplotypes '
-              '--track_ref_reads '
-              '--vsc_min_fraction_indels "0.12" '
+              '--add_hp_channel --alt_aligned_pileup "diff_channels"'
+              ' --candidate_positions'
+              ' "/tmp/deeptrio_tmp_output/candidate_positions@64"'
+              ' --discard_non_dna_regions --gvcf'
+              ' "/tmp/deeptrio_tmp_output/gvcf.tfrecord@64.gz"'
+              ' --max_reads_for_dynamic_bases_per_region "200"'
+              ' --min_mapping_quality "1" --parse_sam_aux_fields'
+              ' --partition_size "10000" --phase_reads'
+              ' --pileup_image_height_child "60" --pileup_image_height_parent'
+              ' "40" --pileup_image_width "199" --norealign_reads'
+              ' --sort_by_haplotypes --track_ref_reads'
+              ' --vsc_min_fraction_indels "0.12" '
           ),
           None,
       ),
       (
           'alt_aligned_pileup="rows",vsc_min_fraction_indels=0.03',
           (
-              '--add_hp_channel '
-              '--alt_aligned_pileup "rows" '
-              '--discard_non_dna_regions '
-              '--gvcf "/tmp/deeptrio_tmp_output/gvcf.tfrecord@64.gz" '
-              '--max_reads_for_dynamic_bases_per_region "200" '
-              '--min_mapping_quality "1" '
-              '--parse_sam_aux_fields '
-              '--partition_size "25000" '
-              '--phase_reads '
-              '--pileup_image_height_child "60" '
-              '--pileup_image_height_parent "40" '
-              '--pileup_image_width "199" '
-              '--norealign_reads '
-              '--sort_by_haplotypes '
-              '--track_ref_reads '
-              '--vsc_min_fraction_indels "0.03" '
+              '--add_hp_channel --alt_aligned_pileup "rows"'
+              ' --candidate_positions'
+              ' "/tmp/deeptrio_tmp_output/candidate_positions@64"'
+              ' --discard_non_dna_regions --gvcf'
+              ' "/tmp/deeptrio_tmp_output/gvcf.tfrecord@64.gz"'
+              ' --max_reads_for_dynamic_bases_per_region "200"'
+              ' --min_mapping_quality "1" --parse_sam_aux_fields'
+              ' --partition_size "10000" --phase_reads'
+              ' --pileup_image_height_child "60" --pileup_image_height_parent'
+              ' "40" --pileup_image_width "199" --norealign_reads'
+              ' --sort_by_haplotypes --track_ref_reads'
+              ' --vsc_min_fraction_indels "0.03" '
           ),
           # Because PacBio uses candidate_sweep, make_examples got run twice.
           (
@@ -589,7 +581,7 @@ def test_pacbio_args_overwrite(
     self.assertEqual(
         commands[0],
         'time seq 0 63 | parallel -q --halt 2 --line-buffer '
-        '/opt/deepvariant/bin/deeptrio/make_examples --mode calling '
+        '/opt/deepvariant/bin/deeptrio/make_examples --mode candidate_sweep '
         '--ref "your_ref" --reads_parent1 "your_bam_parent1" '
         '--reads_parent2 "your_bam_parent2" '
         '--reads "your_bam_child" '
@@ -751,6 +743,7 @@ def test_postprocess_variants_extra_args(
         '--infile '
         '"/tmp/deeptrio_tmp_output/call_variants_output_child.tfrecord.gz" '
         '--outfile "your_vcf_child" '
+        '--cpus 0 '
         '--nonvariant_site_tfrecord_path '
         '"/tmp/deeptrio_tmp_output/gvcf_child.tfrecord@64.gz" '
         '--gvcf_outfile "your_gvcf_child" '