Skip to content

Commit

Permalink
fixup! 🚧 Use population-based weighted sampling for Asia builds
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Jun 7, 2024
1 parent f68c86f commit 88c1397
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
14 changes: 7 additions & 7 deletions nextstrain_profiles/nextstrain-gisaid/builds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ subsampling:
# Early focal samples for Asia
asia_early:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 700
max_date: "--max-date 1M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -199,7 +199,7 @@ subsampling:
# Recent focal samples for Asia
asia_recent:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 2800
min_date: "--min-date 1M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -220,7 +220,7 @@ subsampling:
# Early focal samples for Asia
asia_early:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 700
max_date: "--max-date 2M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -233,7 +233,7 @@ subsampling:
# Recent focal samples for Asia
asia_recent:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 2800
min_date: "--min-date 2M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -254,7 +254,7 @@ subsampling:
# Early focal samples for Asia
asia_early:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 700
max_date: "--max-date 6M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -267,7 +267,7 @@ subsampling:
# Recent focal samples for Asia
asia_recent:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 2800
min_date: "--min-date 6M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -286,7 +286,7 @@ subsampling:
# Focal samples for Asia
asia:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 3500
exclude: "--exclude-where 'region!=Asia'"
# Contextual samples from the rest of the world
Expand Down
14 changes: 7 additions & 7 deletions nextstrain_profiles/nextstrain-open/builds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ subsampling:
# Early focal samples for Asia
asia_early:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 700
max_date: "--max-date 1M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -199,7 +199,7 @@ subsampling:
# Recent focal samples for Asia
asia_recent:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 2800
min_date: "--min-date 1M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -220,7 +220,7 @@ subsampling:
# Early focal samples for Asia
asia_early:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 700
max_date: "--max-date 2M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -233,7 +233,7 @@ subsampling:
# Recent focal samples for Asia
asia_recent:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 2800
min_date: "--min-date 2M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -254,7 +254,7 @@ subsampling:
# Early focal samples for Asia
asia_early:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 700
max_date: "--max-date 6M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -267,7 +267,7 @@ subsampling:
# Recent focal samples for Asia
asia_recent:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 2800
min_date: "--min-date 6M"
exclude: "--exclude-where 'region!=Asia'"
Expand All @@ -286,7 +286,7 @@ subsampling:
# Focal samples for Asia
asia:
group_by: "country year month"
group_by_weights: "data/country_population_weights.tsv"
group_by_weights: "defaults/population_weights.tsv"
max_sequences: 3500
exclude: "--exclude-where 'region!=Asia'"
# Contextual samples from the rest of the world
Expand Down
1 change: 1 addition & 0 deletions workflow/snakemake_rules/main_workflow.smk
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ rule subsample:
"benchmarks/subsample_{build_name}_{subsample}.txt"
params:
group_by = _get_specific_subsampling_setting("group_by", optional=True),
# FIXME: pull this from config.files.population_weights?
group_by_weights = _get_specific_subsampling_setting("group_by_weights", optional=True),
sequences_per_group = _get_specific_subsampling_setting("seq_per_group", optional=True),
subsample_max_sequences = _get_specific_subsampling_setting("max_sequences", optional=True),
Expand Down

0 comments on commit 88c1397

Please sign in to comment.