-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add test config files for hg38 nd mm10 #20
- Loading branch information
Showing
2 changed files
with
176 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
|
||
##### RESOURCES ##### | ||
mem: '32000' | ||
threads: 2 | ||
partition: 'shortq' | ||
|
||
##### GENERAL ##### | ||
project_name: hg38test #MyATACproject # name of the project/dataset | ||
result_path: results/hg38test/ #/path/to/results/ # path to the output folder | ||
annotation: test/hg38test/hg38test_atacseq_pipeline_annotation.csv #/path/to/sample_annotation.csv # path to annotation file, specified in config/README.md | ||
|
||
##### PROCESSING ##### | ||
|
||
# genome in the project | ||
genome: hg38 | ||
|
||
##### REPORT ##### | ||
|
||
# sample(!) specific annotation columns of interest from the annotation sheet (note: unit specific annotations are not retained) | ||
# first entry is used to color UCSC Genome Browser tracks | ||
annot_columns: ['pass_qc','read_type','organism'] # (optional) can be empty [""]. must be columns in the annotation sheet | ||
|
||
##### QUANTIFICATION ##### | ||
# coverage calculation with bedtools for QC report | ||
tss_slop: 2000 | ||
noise_lower: 100 | ||
|
||
# determination of consensus regions using (py)bedtools | ||
slop_extension: 250 | ||
|
||
##### ANNOTATION ##### | ||
# region annotation parameters for UROPA with gencode_gtf reference | ||
|
||
# assumed size of transcription start sites (TSS) | ||
tss_size: 100 | ||
|
||
# assumed TSS proximal distance upstream | ||
proximal_size_up: 1000 | ||
|
||
# assumed TSS proximal distance downstream | ||
proximal_size_dn: 500 | ||
|
||
# distal distance | ||
distal_size: 10000 | ||
|
||
|
||
##### RESOURCES ##### | ||
# specify paths to resource files required by the pipeline | ||
# download resources for the GRCm38 (hg38) assembly of the mouse genome: https://zenodo.org/records/6344322 | ||
# download resources for the GRCh38 (hg38) assembly of the human genome: https://zenodo.org/records/6344174 | ||
|
||
# (nextera) adapter fasta (.fa) file and/or nucleotide adapter sequence of the used ATAC-seq protocol (used by Bowtie2 if provided) | ||
# if not available/applicable leave empty: "" | ||
adapter_fasta: resources/atacseq_pipeline/hg38/atacseq/nextera_adapters.fa | ||
adapter_sequence: GTCTCGTGGGCTCGG | ||
|
||
# indices for Bowtie2 | ||
bowtie2_index: resources/atacseq_pipeline/hg38/indices_for_Bowtie2/hg38 | ||
|
||
# chromosome lengths for a given genome | ||
chromosome_sizes: resources/atacseq_pipeline/hg38/atacseq/hg38.chromSizes | ||
|
||
# blacklisted regions from ENCODE as .bed files | ||
blacklisted_regions: resources/atacseq_pipeline/hg38/atacseq/hg38-ENCODE_blacklist.sorted.v3.bed | ||
|
||
# complement to the blacklisted regions as .bed files | ||
whitelisted_regions: resources/atacseq_pipeline/hg38/atacseq/hg38-ENCODE_whitelist.sorted.v3.bed | ||
|
||
# .bed files, from e.g., gencode (hg38) or CCDS (hg38) | ||
unique_tss: resources/atacseq_pipeline/hg38/atacseq/hg38_gencode_tss_unique.sorted.bed | ||
|
||
# regulatory build regulatory features chromosomes only from e.g., Ensembl | ||
regulatory_regions: resources/atacseq_pipeline/hg38/homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.chromosomes_only.20161111.bed | ||
|
||
# length of genomes as integer | ||
genome_size: 2747877777 | ||
|
||
# abbreviation of mitochondria chromosome | ||
mitochondria_name: chrM | ||
|
||
# genomes as .fa files | ||
genome_fasta: resources/atacseq_pipeline/hg38/hg38.fa | ||
|
||
# gencode .gtf files (generated by running /workflow/scripts/parse_reg_build_file.py on gff.gz files from Ensembl http://www.ensembl.org/) | ||
gencode_gtf: resources/atacseq_pipeline/hg38/gencode.v38.basic.annotation.gtf | ||
|
||
# regulatory build regulatory features .gtf files (generated by running /workflow/scripts/parse_reg_build_file.py on gff.gz files from Ensembl http://www.ensembl.org/) | ||
regulatory_build_gtf: resources/atacseq_pipeline/hg38/homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.20210107.gtf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
|
||
##### RESOURCES ##### | ||
mem: '32000' | ||
threads: 2 | ||
partition: 'shortq' | ||
|
||
##### GENERAL ##### | ||
project_name: mm10test #MyATACproject # name of the project/dataset | ||
result_path: results/mm10test/ #/path/to/results/ # path to the output folder | ||
annotation: test/mm10test/mm10test_atacseq_pipeline_annotation.csv #/path/to/sample_annotation.csv # path to annotation file, specified in config/README.md | ||
|
||
##### PROCESSING ##### | ||
|
||
# genome in the project | ||
genome: mm10 | ||
|
||
##### REPORT ##### | ||
|
||
# sample(!) specific annotation columns of interest from the annotation sheet (note: unit specific annotations are not retained) | ||
# first entry is used to color UCSC Genome Browser tracks | ||
annot_columns: ['pass_qc','read_type','organism'] # (optional) can be empty [""]. must be columns in the annotation sheet | ||
|
||
##### QUANTIFICATION ##### | ||
# coverage calculation with bedtools for QC report | ||
tss_slop: 2000 | ||
noise_lower: 100 | ||
|
||
# determination of consensus regions using (py)bedtools | ||
slop_extension: 250 | ||
|
||
##### ANNOTATION ##### | ||
# region annotation parameters for UROPA with gencode_gtf reference | ||
|
||
# assumed size of transcription start sites (TSS) | ||
tss_size: 100 | ||
|
||
# assumed TSS proximal distance upstream | ||
proximal_size_up: 1000 | ||
|
||
# assumed TSS proximal distance downstream | ||
proximal_size_dn: 500 | ||
|
||
# distal distance | ||
distal_size: 10000 | ||
|
||
|
||
##### RESOURCES ##### | ||
# specify paths to resource files required by the pipeline | ||
# download resources for the GRCm38 (mm10) assembly of the mouse genome: https://zenodo.org/records/6344322 | ||
# download resources for the GRCh38 (hg38) assembly of the human genome: https://zenodo.org/records/6344174 | ||
|
||
# (nextera) adapter fasta (.fa) file and/or nucleotide adapter sequence of the used ATAC-seq protocol (used by Bowtie2 if provided) | ||
# if not available/applicable leave empty: "" | ||
adapter_fasta: resources/atacseq_pipeline/mm10/atacseq/nextera_adapters.fa | ||
adapter_sequence: GTCTCGTGGGCTCGG | ||
|
||
# indices for Bowtie2 | ||
bowtie2_index: resources/atacseq_pipeline/mm10/indices_for_Bowtie2/mm10 | ||
|
||
# chromosome lengths for a given genome | ||
chromosome_sizes: resources/atacseq_pipeline/mm10/atacseq/mm10.chromSizes | ||
|
||
# blacklisted regions from ENCODE as .bed files | ||
blacklisted_regions: resources/atacseq_pipeline/mm10/atacseq/mm10-ENCODE_blacklist.sorted.v2.bed | ||
|
||
# complement to the blacklisted regions as .bed files | ||
whitelisted_regions: resources/atacseq_pipeline/mm10/atacseq/mm10-ENCODE_whitelist.sorted.v2.bed | ||
|
||
# .bed files, from e.g., gencode (hg38) or CCDS (mm10) | ||
unique_tss: resources/atacseq_pipeline/mm10/atacseq/mm10_CCDS_tss_unique.bed | ||
|
||
# regulatory build regulatory features chromosomes only from e.g., Ensembl | ||
regulatory_regions: resources/atacseq_pipeline/mm10/mus_musculus.GRCm38.Regulatory_Build.regulatory_features.20161111.sorted.bed | ||
|
||
# length of genomes as integer | ||
genome_size: 2407883318 | ||
|
||
# abbreviation of mitochondria chromosome | ||
mitochondria_name: chrM | ||
|
||
# genomes as .fa files | ||
genome_fasta: resources/atacseq_pipeline/mm10/mm10.fa | ||
|
||
# gencode .gtf files (generated by running /workflow/scripts/parse_reg_build_file.py on gff.gz files from Ensembl http://www.ensembl.org/) | ||
gencode_gtf: resources/atacseq_pipeline/mm10/gencode.vM25.chr_patch_hapl_scaff.basic.annotation.gtf | ||
|
||
# regulatory build regulatory features .gtf files (generated by running /workflow/scripts/parse_reg_build_file.py on gff.gz files from Ensembl http://www.ensembl.org/) | ||
regulatory_build_gtf: resources/atacseq_pipeline/mm10/mus_musculus.GRCm38.Regulatory_Build.regulatory_features.20180516.gtf |