Skip to content

Commit

Permalink
Automated g4 rollback of changelist 564884573
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 565106180
  • Loading branch information
kishwarshafin authored and copybara-github committed Sep 13, 2023
1 parent 5b54841 commit 3f35db0
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 88 deletions.
8 changes: 0 additions & 8 deletions deepvariant/postprocess_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1331,10 +1331,6 @@ def main(argv=()):
)
if FLAGS.outfile.endswith('.gz'):
build_index(FLAGS.outfile, use_csi)
if _SOMATIC_VARIANTS_PATH.value and FLAGS.somatic_variants_path.endswith(
'.gz'
):
build_index(FLAGS.somatic_variants_path, use_csi)
logging.info(
'VCF creation took %s minutes', (time.time() - start_time) / 60
)
Expand Down Expand Up @@ -1366,10 +1362,6 @@ def main(argv=()):
)
if FLAGS.outfile.endswith('.gz'):
build_index(FLAGS.outfile, use_csi)
if FLAGS.somatic_variants_path and FLAGS.somatic_variants_path.endswith(
'.gz'
):
build_index(FLAGS.somatic_variants_path, use_csi)
if FLAGS.gvcf_outfile.endswith('.gz'):
build_index(FLAGS.gvcf_outfile, use_csi)
logging.info(
Expand Down
66 changes: 21 additions & 45 deletions scripts/run_deepsomatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,8 @@
'compatible with --ref.'
),
)
_OUTPUT_NON_SOMATIC_VCF = flags.DEFINE_string(
'output_non_somatic_vcf',
None,
'Required. Path where we should write VCF file containing non-somatic'
' variants.',
)
_OUTPUT_SOMATIC_VCF = flags.DEFINE_string(
'output_somatic_vcf',
None,
'Required. Path where we should write VCF file containing somatic'
' variants.',
_OUTPUT_VCF = flags.DEFINE_string(
'output_vcf', None, 'Required. Path where we should write VCF file.'
)
# Optional flags.
_DRY_RUN = flags.DEFINE_boolean(
Expand Down Expand Up @@ -218,6 +209,11 @@
),
)

# Optional flags for postprocess_variants.
_OUTPUT_GVCF = flags.DEFINE_string(
'output_gvcf', None, 'Optional. Path where we should write gVCF file.'
)

# Optional flags for vcf_stats_report.
_VCF_STATS_REPORT = flags.DEFINE_boolean(
'vcf_stats_report',
Expand All @@ -227,16 +223,8 @@
'statistics about the output VCF.'
),
)
_SOMATIC_REPORT_TITLE = flags.DEFINE_string(
'somatic_report',
None,
(
'Optional. Title for the VCF stats report (HTML).'
'If not provided, the title will be the sample name.'
),
)
_NON_SOMATIC_REPORT_TITLE = flags.DEFINE_string(
'non_somatic_report',
_REPORT_TITLE = flags.DEFINE_string(
'report_title',
None,
(
'Optional. Title for the VCF stats report (HTML).'
Expand Down Expand Up @@ -445,16 +433,14 @@ def postprocess_variants_command(
ref: str,
infile: str,
outfile: str,
somatic_outfile: str,
extra_args: str,
**kwargs,
**kwargs
) -> Tuple[str, Optional[str]]:
"""Returns a postprocess_variants (command, logfile) for subprocess."""
command = ['time', '/opt/deepvariant/bin/postprocess_variants']
command.extend(['--ref', '"{}"'.format(ref)])
command.extend(['--infile', '"{}"'.format(infile)])
command.extend(['--outfile', '"{}"'.format(outfile)])
command.extend(['--somatic_variants_path', '"{}"'.format(somatic_outfile)])
# Extend the command with all items in kwargs and extra_args.
kwargs = _update_kwargs_with_warning(kwargs, _extra_args_to_dict(extra_args))
command = _extend_command_by_args_dict(command, kwargs)
Expand Down Expand Up @@ -571,8 +557,12 @@ def create_all_commands_and_logfiles(intermediate_results_dir: str,
check_flags()
commands = []
# make_examples_somatic
# This will always be none as GVCFs are not supported in somatic mode
nonvariant_site_tfrecord_path = None
if _OUTPUT_GVCF.value is not None:
nonvariant_site_tfrecord_path = os.path.join(
intermediate_results_dir,
'gvcf.tfrecord@{}.gz'.format(_NUM_SHARDS.value),
)

examples = os.path.join(
intermediate_results_dir,
Expand Down Expand Up @@ -633,34 +623,26 @@ def create_all_commands_and_logfiles(intermediate_results_dir: str,
postprocess_variants_command(
ref=_REF.value,
infile=call_variants_output,
outfile=_OUTPUT_NON_SOMATIC_VCF.value,
somatic_outfile=_OUTPUT_SOMATIC_VCF.value,
outfile=_OUTPUT_VCF.value,
extra_args=_POSTPROCESS_VARIANTS_EXTRA_ARGS.value,
nonvariant_site_tfrecord_path=nonvariant_site_tfrecord_path,
gvcf_outfile=_OUTPUT_GVCF.value,
)
)

# vcf_stats_report
if _VCF_STATS_REPORT.value:
# Report for non-somatic variants
commands.append(
vcf_stats_report_command(
vcf_path=_OUTPUT_NON_SOMATIC_VCF.value,
title=_NON_SOMATIC_REPORT_TITLE.value,
)
)
commands.append(
vcf_stats_report_command(
vcf_path=_OUTPUT_SOMATIC_VCF.value,
title=_SOMATIC_REPORT_TITLE.value,
vcf_path=_OUTPUT_VCF.value, title=_REPORT_TITLE.value
)
)

# runtime-by-region
if _LOGGING_DIR.value and _RUNTIME_REPORT.value:
commands.append(
runtime_by_region_vis_command(
runtime_by_region_path, title=_SOMATIC_REPORT_TITLE.value
runtime_by_region_path, title=_REPORT_TITLE.value
)
)

Expand All @@ -672,14 +654,8 @@ def main(_):
print('DeepSomatic: DeepVariant version {}'.format(DEEP_VARIANT_VERSION))
return

for flag_key in [
'model_type',
'ref',
'reads_tumor',
'reads_normal',
'output_non_somatic_vcf',
'output_somatic_vcf',
]:
for flag_key in ['model_type', 'ref', 'reads_tumor', 'reads_normal',
'output_vcf']:
if FLAGS.get_flag_value(flag_key, None) is None:
sys.stderr.write('--{} is required.\n'.format(flag_key))
sys.stderr.write('Pass --helpshort or --helpfull to see help on flags.\n')
Expand Down
85 changes: 50 additions & 35 deletions scripts/run_deepsomatic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,19 @@ def test_basic_commands(self, model_type, use_keras_model):
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = 'your_gvcf'
FLAGS.num_shards = 64
FLAGS.customized_model = '/opt/models/wgs/model.ckpt'
FLAGS.use_keras_model = use_keras_model
commands = run_deepsomatic.create_all_commands_and_logfiles(
'/tmp/deepsomatic_tmp_output', used_in_test=True
)

extra_args_plus_gvcf = (
'--gvcf "/tmp/deepsomatic_tmp_output/[email protected]"'
)

self.assertEqual(
commands[0][0],
'time seq 0 63 | parallel -q --halt 2 --line-buffer'
Expand All @@ -67,10 +72,11 @@ def test_basic_commands(self, model_type, use_keras_model):
' "your_normal_bam" --examples'
' "/tmp/deepsomatic_tmp_output/[email protected]"'
' --channels "insert_size"'
' %s'
' --vsc_max_fraction_indels_for_non_target_sample "0.5"'
' --vsc_max_fraction_snps_for_non_target_sample "0.5"'
' --vsc_min_fraction_indels "0.05" --vsc_min_fraction_snps "0.029"'
' --task {}',
' --task {}' % (extra_args_plus_gvcf),
)
call_variants_bin = (
'call_variants_keras' if use_keras_model else 'call_variants'
Expand All @@ -92,8 +98,10 @@ def test_basic_commands(self, model_type, use_keras_model):
'--ref "your_ref" '
'--infile '
'"/tmp/deepsomatic_tmp_output/call_variants_output.tfrecord.gz" '
'--outfile "your_non_somatic_vcf" '
'--somatic_variants_path "your_somatic_vcf"'
'--outfile "your_vcf" '
'--gvcf_outfile "your_gvcf" '
'--nonvariant_site_tfrecord_path '
'"/tmp/deepsomatic_tmp_output/[email protected]"'
),
)

Expand All @@ -107,8 +115,8 @@ def test_sample_name_command(self, sample_name_tumor, sample_name_normal):
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = None
FLAGS.num_shards = 64
FLAGS.regions = None
FLAGS.sample_name_tumor = sample_name_tumor
Expand Down Expand Up @@ -158,8 +166,7 @@ def test_sample_name_command(self, sample_name_tumor, sample_name_normal):
'--ref "your_ref" '
'--infile '
'"/tmp/deepsomatic_tmp_output/call_variants_output.tfrecord.gz" '
'--outfile "your_non_somatic_vcf" '
'--somatic_variants_path "your_somatic_vcf"',
'--outfile "your_vcf"'
)

@parameterized.parameters(
Expand All @@ -184,8 +191,8 @@ def test_make_examples_extra_args_boolean(
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = 'your_gvcf'
FLAGS.num_shards = 64
FLAGS.make_examples_extra_args = make_examples_extra_args
FLAGS.customized_model = '/opt/models/wgs/model.ckpt'
Expand All @@ -200,7 +207,8 @@ def test_make_examples_extra_args_boolean(
' "your_normal_bam" --examples'
' "/tmp/deepsomatic_tmp_output/[email protected]"'
' --channels "insert_size"'
' %s'
' --gvcf'
' "/tmp/deepsomatic_tmp_output/[email protected]" %s'
' --vsc_max_fraction_indels_for_non_target_sample "0.5"'
' --vsc_max_fraction_snps_for_non_target_sample "0.5"'
' --vsc_min_fraction_indels "0.05" --vsc_min_fraction_snps "0.029"'
Expand All @@ -213,26 +221,30 @@ def test_logging_dir(self):
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = 'your_gvcf'
FLAGS.num_shards = 64
FLAGS.logging_dir = '/tmp/deepsomatic_tmp_output/LOGDIR'
FLAGS.customized_model = '/opt/models/wgs/model.ckpt'
commands = run_deepsomatic.create_all_commands_and_logfiles(
'/tmp/deepsomatic_tmp_output', used_in_test=True
)

self.assertEqual(
commands[0][0],
'time seq 0 63 | parallel -q --halt 2 --line-buffer'
' /opt/deepvariant/bin/make_examples_somatic --mode calling'
' --ref "your_ref" --reads_tumor "your_tumor_bam"'
' --reads_normal "your_normal_bam" --examples'
' "/tmp/deepsomatic_tmp_output/[email protected]"'
' --channels "insert_size"'
' --vsc_max_fraction_indels_for_non_target_sample "0.5"'
' --vsc_max_fraction_snps_for_non_target_sample "0.5"'
' --vsc_min_fraction_indels "0.05" --vsc_min_fraction_snps "0.029"'
' --task {}',
(
'time seq 0 63 | parallel -q --halt 2 --line-buffer'
' /opt/deepvariant/bin/make_examples_somatic --mode calling --ref'
' "your_ref" --reads_tumor "your_tumor_bam" --reads_normal'
' "your_normal_bam" --examples'
' "/tmp/deepsomatic_tmp_output/[email protected]"'
' --channels "insert_size" --gvcf'
' "/tmp/deepsomatic_tmp_output/[email protected]"'
' --vsc_max_fraction_indels_for_non_target_sample "0.5"'
' --vsc_max_fraction_snps_for_non_target_sample "0.5"'
' --vsc_min_fraction_indels "0.05" --vsc_min_fraction_snps "0.029"'
' --task {}'
),
)

@parameterized.parameters(
Expand All @@ -246,8 +258,8 @@ def test_make_examples_regions(self, regions, expected_args):
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = 'your_gvcf'
FLAGS.num_shards = 64
FLAGS.regions = regions
FLAGS.customized_model = '/opt/models/wgs/model.ckpt'
Expand All @@ -263,7 +275,8 @@ def test_make_examples_regions(self, regions, expected_args):
' "your_normal_bam" --examples'
' "/tmp/deepsomatic_tmp_output/[email protected]"'
' --channels "insert_size"'
' %s'
' --gvcf'
' "/tmp/deepsomatic_tmp_output/[email protected]" %s'
' --vsc_max_fraction_indels_for_non_target_sample "0.5"'
' --vsc_max_fraction_snps_for_non_target_sample "0.5"'
' --vsc_min_fraction_indels "0.05" --vsc_min_fraction_snps "0.029"'
Expand All @@ -278,8 +291,8 @@ def test_make_examples_extra_args_invalid(self):
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = 'your_gvcf'
FLAGS.num_shards = 64
FLAGS.make_examples_extra_args = 'keep_secondary_alignments'
FLAGS.customized_model = '/opt/models/wgs/model.ckpt'
Expand Down Expand Up @@ -309,8 +322,8 @@ def test_call_variants_extra_args(
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = 'your_gvcf'
FLAGS.num_shards = 64
FLAGS.call_variants_extra_args = call_variants_extra_args
FLAGS.customized_model = '/opt/models/wgs/model.ckpt'
Expand Down Expand Up @@ -338,8 +351,8 @@ def test_postprocess_variants_extra_args(
FLAGS.ref = 'your_ref'
FLAGS.reads_tumor = 'your_tumor_bam'
FLAGS.reads_normal = 'your_normal_bam'
FLAGS.output_somatic_vcf = 'your_somatic_vcf'
FLAGS.output_non_somatic_vcf = 'your_non_somatic_vcf'
FLAGS.output_vcf = 'your_vcf'
FLAGS.output_gvcf = 'your_gvcf'
FLAGS.num_shards = 64
FLAGS.postprocess_variants_extra_args = postprocess_variants_extra_args
FLAGS.customized_model = '/opt/models/wgs/model.ckpt'
Expand All @@ -353,8 +366,10 @@ def test_postprocess_variants_extra_args(
'--ref "your_ref" '
'--infile '
'"/tmp/deepsomatic_tmp_output/call_variants_output.tfrecord.gz" '
'--outfile "your_non_somatic_vcf" '
'--somatic_variants_path "your_somatic_vcf" '
'--outfile "your_vcf" '
'--gvcf_outfile "your_gvcf" '
'--nonvariant_site_tfrecord_path '
'"/tmp/deepsomatic_tmp_output/[email protected]" '
'%s' % expected_args,
)

Expand Down

0 comments on commit 3f35db0

Please sign in to comment.