Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

edits related to production pipeline #114

Merged
merged 8 commits into from
Jul 18, 2024
32 changes: 16 additions & 16 deletions workflows/config/snakemake/production_config_HomSap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,32 @@
"num_samples_per_population": [100],
},
{"id":"OutOfAfricaArchaicAdmixture_5R19",
"num_samples_per_population": 3*[100],
"num_samples_per_population": [100, 100, 100],
}
]
"genetic_map": "HapMapII_GRCh38"
"chrm_list": "chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22"
"chrm_list": "chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22"
"dfe_list": ["none", "Gamma_K17"]
"annotation_list": ["all_sites", "ensembl_havana_104_exons", "ensembl_havana_104_exons"]
"mask_file": "workflows/masks/HapmapII_GRCh38.mask.bed"
"stairway_annot_mask" : "" # set this or any of the below to 'none' to skip annot masking
"msmc_annot_mask" : ""
"gone_annot_mask" : ""
"smcpp_annot_mask" : ""
"msmc_annot_mask": ""
"gone_annot_mask": ""
"smcpp_annot_mask": ""

# slim settings
"slim_scaling_factor": 1
"slim_burn_in": 10

# n(t) specific configs
"methods" : ["stairwayplot", "gone", "smcpp", "msmc"]
"num_sampled_genomes_msmc" : [6]
"num_msmc_iterations" : 20
"gone_phase" : 1 # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
"gone_max_snps" : 50000 # default=50000
"gone_threads" : 8
"gone_num_gens" : 2000 # default=2000
"gone_num_bins" : 400 # default=400
"methods": ["stairwayplot", "gone", "smcpp", "msmc"]
"num_sampled_genomes_msmc": [6]
"num_msmc_iterations": 20
"gone_phase": 1 # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
"gone_max_snps": 50000 # default=50000
"gone_threads": 8
"gone_num_gens": 2000 # default=2000
"gone_num_bins": 400 # default=400


# exe paths
Expand All @@ -44,6 +44,6 @@
"dfe_alpha_data_path_1": "ext/dfe-alpha-release-2.16/data"
"dfe_alpha_data_path_2": "three-epoch"
"grapes_exec": "ext/grapes/multi_grapes"
"msmc_exec" : "ext/msmc2/build/release/msmc2"
"stairwayplot_code" : "ext/stairwayplot/swarmops.jar"
"gone_code" : "ext/GONE/Linux"
"msmc_exec": "ext/msmc2/build/release/msmc2"
"stairwayplot_code": "ext/stairwayplot/swarmops.jar"
"gone_code": "ext/GONE/Linux"
40 changes: 18 additions & 22 deletions workflows/config/snakemake/production_config_PhoSin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,32 @@
# species-specific settings
"species": "PhoSin"
"demo_models": [
{"id":"Constant",
"num_samples_per_population": [100],
},
{"id":"Vaquita2Epoch_1R22",
"num_samples_per_population": 3*[100],
}
{"id": "Constant", "num_samples_per_population": [100]},
{"id": "Vaquita2Epoch_1R22", "num_samples_per_population": [100]}
]
"genetic_map": null
"chrm_list": "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21"
"chrm_list": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21"
"dfe_list": ["none", "Gamma_R22"]
"annotation_list": ["all_sites", "Phocoena_sinus.mPhoSin1.pri.110_exons"]
"mask_file": "workflows/masks/PhoSin_fake.mask.bed"
"stairway_annot_mask" : "" # set this or any of the below to 'none' to skip annot masking
"msmc_annot_mask" : ""
"gone_annot_mask" : ""
"smcpp_annot_mask" : ""
"stairway_annot_mask": "" # set this or any of the below to 'none' to skip annot masking
"msmc_annot_mask": ""
"gone_annot_mask": ""
"smcpp_annot_mask": ""

# slim settings
"slim_scaling_factor": 1
"slim_burn_in": 10

# n(t) specific configs
"methods" : ["stairwayplot", "gone", "smcpp", "msmc"]
"num_sampled_genomes_msmc" : [6]
"num_msmc_iterations" : 20
"gone_phase" : 1 # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
"gone_max_snps" : 50000 # default=50000
"gone_threads" : 8
"gone_num_gens" : 2000 # default=2000
"gone_num_bins" : 400 # default=400
"methods": ["stairwayplot", "gone", "smcpp", "msmc"]
"num_sampled_genomes_msmc": [6]
"num_msmc_iterations": 20
"gone_phase": 1 # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
"gone_max_snps": 50000 # default=50000
"gone_threads": 8
"gone_num_gens": 2000 # default=2000
"gone_num_bins": 400 # default=400


# exe paths
Expand All @@ -44,6 +40,6 @@
"dfe_alpha_data_path_1": "ext/dfe-alpha-release-2.16/data"
"dfe_alpha_data_path_2": "three-epoch"
"grapes_exec": "ext/grapes/multi_grapes"
"msmc_exec" : "ext/msmc2/build/release/msmc2"
"stairwayplot_code" : "ext/stairwayplot/swarmops.jar"
"gone_code" : "ext/GONE/Linux"
"msmc_exec": "ext/msmc2/build/release/msmc2"
"stairwayplot_code": "ext/stairwayplot/swarmops.jar"
"gone_code": "ext/GONE/Linux"
22 changes: 9 additions & 13 deletions workflows/simulation.snake
Original file line number Diff line number Diff line change
Expand Up @@ -64,28 +64,24 @@ rule simulation:
run:
if wildcards.demog == 'Constant':
model = stdpopsim.PiecewiseConstantSize(species.population_size)
mutation_rate = 1.29e-08 # where is this from?
#samples = model.get_samples(*demo_sample_size_dict[wildcards.demog])
mutation_rate = species.genome.mean_mutation_rate
else:
model = species.get_demographic_model(wildcards.demog)
mutation_rate = model.mutation_rate
#samples = model.get_samples(*demo_sample_size_dict[wildcards.demog]) # YRI, CEU, CHB
samples = {f"{model.populations[i].name}": m for i, m in enumerate(demo_sample_size_dict[wildcards.demog])}
genetic_map_id = config["genetic_map"]
contig = species.get_contig(wildcards.chrms, genetic_map=genetic_map_id)
if wildcards.dfes != "none":
# Load dfe only if provided
dfe = species.get_dfe(wildcards.dfes)
if wildcards.annots == "all_sites":
# Adding selection to the whole contig
contig.add_dfe(intervals=np.array([[0, int(contig.length)]]), DFE=dfe)
elif wildcards.annots == "none":
contig = species.get_contig(wildcards.chrms, genetic_map=genetic_map_id)
else:
## Adding annotation only seletion on exon region
annot = species.get_annotations(wildcards.annots)
annot_intervals = annot.get_chromosome_annotations(wildcards.chrms)
contig.add_dfe(intervals=annot_intervals, DFE=dfe)
if wildcards.annots == "all_sites":
# Adding selection to the whole contig
contig.add_dfe(intervals=np.array([[0, int(contig.length)]]), DFE=dfe)
else:
## Adding annotation only seletion on exon region
annot = species.get_annotations(wildcards.annots)
annot_intervals = annot.get_chromosome_annotations(wildcards.chrms)
contig.add_dfe(intervals=annot_intervals, DFE=dfe)
Comment on lines +82 to +89
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can someone check this? I was getting a dfe is called before it's defined error. This logic resolves that, but I want to make sure it's correct otherwise.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks good to me


contig.mutation_rate = mutation_rate
engine = stdpopsim.get_engine("slim")
Expand Down
Loading