Skip to content

Commit

Permalink
3.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
tdayris committed Jun 7, 2024
1 parent c11c939 commit 1633792
Show file tree
Hide file tree
Showing 16 changed files with 232 additions and 123 deletions.
5 changes: 0 additions & 5 deletions .test/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,31 +42,26 @@ all: report.txt
echo "Process over"

report.txt: pipeline.txt
mamba activate test-snakemake-wrappers && \
snakemake -s ../workflow/Snakefile \
--report report.zip 2>&1 | tee report.txt


pipeline.txt: linter_info.txt
mamba activate test-snakemake-wrappers && \
snakemake -s ../workflow/Snakefile \
--cores 7 --profile local \
--software-deployment-method apptainer conda \
2>&1 | tee pipeline.txt


format.txt:
mamba activate test-snakemake-wrappers && \
snakefmt $(snakefiles) 2>&1 | tee format.txt


black.txt:
mamba activate test-snakemake-wrappers && \
black $(python_scripts) 2>&1 | tee black.txt


linter_info.txt: format.txt black.txt
mamba activate test-snakemake-wrappers && \
snakemake -s ../workflow/Snakefile --lint 2>&1 | tee linter_info.txt

.PHONY: clean
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# 3.6.0

## Features:

* Separate resources among subdirectories holding genome names
* Snakemake up to 8.13.0
* Snakemake wrappers up to 3.12.0

# 3.5.0

## Features:
Expand Down
18 changes: 9 additions & 9 deletions workflow/reports/material_methods.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,20 @@ usage, and resutls can be found on the `Snakemake workflow`_ page.
.. _Snakemake: https://snakemake.readthedocs.io
.. _Github: https://github.com/tdayris/fair_genome_indexer
.. _`Snakemake workflow`: https://snakemake.github.io/snakemake-workflow-catalog?usage=tdayris/fair_genome_indexer
.. _Picard: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/picard/createsequencedictionary.html
.. _Samtools: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/samtools/faidx.html
.. _Picard: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/picard/createsequencedictionary.html
.. _Samtools: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/samtools/faidx.html
.. _Agat: https://agat.readthedocs.io/en/latest/index.html
.. _Pyroe: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/pyroe/idtoname.html
.. _Pyroe: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/pyroe/idtoname.html
.. _Pyfaidx: https://github.com/mdshw5/pyfaidx
.. _GFFRead: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/gffread.html
.. _XSV: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/xsv.html
.. _BCFTools: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/bcftools/filter.html
.. _Tabix: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/tabix/index.html
.. _GFFRead: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/gffread.html
.. _XSV: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/xsv.html
.. _BCFTools: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/bcftools/filter.html
.. _Tabix: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/tabix/index.html
.. _`Boyle-Lab's Github`: https://github.com/Boyle-Lab/Blacklist
.. _BEDTools: https://snakemake-wrappers.readthedocs.io/en/v3.10.2/wrappers/bedtools/merge.html
.. _BEDTools: https://snakemake-wrappers.readthedocs.io/en/v3.12.0/wrappers/bedtools/merge.html
.. _UCSC: https://genome.ucsc.edu/FAQ/FAQformat.html

:Authors:
Thibault Dayris

:Version: 3.5.0 of 05/22/2024
:Version: 4.0.0 of 06/07/2024
28 changes: 16 additions & 12 deletions workflow/reports/results.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,25 @@ files are present:

reference/
├── blacklist
   | └── XXX.merged.bed
   | └── XXX
   | └── XXX.merged.bed
├── variants
   | ├── XXX.all.vcf.gz
   | └── XXX.all.vcf.gz.tbi
   | └── XXX
   | ├── XXX.all.vcf.gz
   | └── XXX.all.vcf.gz.tbi
├── sequences
   | ├── XXX.cdna.fasta
   | ├── XXX.cdna.fasta.fai
   | ├── XXX.dna.dict
   | ├── XXX.dna.fasta
   | └── XXX.dna.fasta.fai
   | └── XXX
   | ├── XXX.cdna.fasta
   | ├── XXX.cdna.fasta.fai
   | ├── XXX.dna.dict
   | ├── XXX.dna.fasta
   | └── XXX.dna.fasta.fai
└── annotation
   ├── XXX.id_to_gene.tsv
├── XXX.t2g.tsv
├── XXX.genePred
└── XXX.gtf
   └── XXX
   ├── XXX.id_to_gene.tsv
├── XXX.t2g.tsv
├── XXX.genePred
└── XXX.gtf


+-------------------+-----------------------------+
Expand Down
102 changes: 71 additions & 31 deletions workflow/rules/agat.smk
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,22 @@ Gustave Roussy computing cluster (Flamingo) reports:
"""


rule fair_genome_indexer_agat_config:
rule fair_genome_indexer_agat_config_gtf:
output:
yaml=temp("tmp/fair_genome_indexer_agat_config/config.yaml"),
yaml=temp("tmp/fair_genome_indexer_agat_config/gtf.yaml"),
threads: 1
resources:
mem_mb=lambda wildcards, attempt: 280 + (100 * attempt),
runtime=lambda wildcards, attempt: 2 * attempt,
disk_mb=1,
tmpdir=tmp,
log:
"logs/fair_genome_indexer_agat_config.log",
"logs/fair_genome_indexer_agat_config/gtf.log",
benchmark:
"benchmark/fair_genome_indexer_agat_config.tsv"
"benchmark/fair_genome_indexer_agat_config/gtf.tsv"
params:
config=lookup_config(
dpath="params/fair_genome_indexer_agat_config",
dpath="params/fair_genome_indexer_agat_config_gtf",
default={
"output_format": "GTF",
"gff_output_version": 3,
Expand Down Expand Up @@ -60,6 +60,46 @@ rule fair_genome_indexer_agat_config:
"../scripts/agat_config.py"


use rule fair_genome_indexer_agat_config_gtf as fair_genome_indexer_agat_config_gff with:
output:
yaml=temp("tmp/fair_genome_indexer_agat_config/gff3.yaml"),
log:
"logs/fair_genome_indexer_agat_config/gff.log",
benchmark:
"benchmark/fair_genome_indexer_agat_config/gff.tsv"
params:
config=lookup_config(
dpath="params/fair_genome_indexer_agat_config_gff",
default={
"output_format": "GFF",
"gff_output_version": 3,
"gtf_output_version": "relax",
"verbose": 1,
"progress_bar": False,
"log": False,
"debug": False,
"tabix": False,
"merge_loci": False,
"throw_fasta": False,
"force_gff_input_version": 0,
"create_l3_for_l2_orphan": True,
"locus_tag": ["locus_tag", "gene_id"],
"prefix_new_id": "nbis",
"check_sequential": True,
"check_l2_linked_to_l3": True,
"check_l1_linked_to_l2": True,
"remove_orphan_l1": True,
"check_all_level3_locations": True,
"check_cds": True,
"check_exons": True,
"check_utrs": True,
"check_all_level2_locations": True,
"check_all_level1_locations": True,
"check_identical_isoforms": True,
},
),


"""
Fix classical GTF/GFF format errors in Ensembl/Gencode files.
Expand All @@ -75,11 +115,11 @@ for grch38

rule fair_genome_indexer_agat_convert_sp_gff2gtf:
input:
gtf="tmp/fair_genome_indexer_get_genome_gtf_annotation/{species}.{build}.{release}.gtf",
config="tmp/fair_genome_indexer_agat_config/config.yaml",
gtf="tmp/fair_genome_indexer_get_genome_gtf_annotation/{species}.{build}.{release}.{gxf}",
config="tmp/fair_genome_indexer_agat_config/{gxf}.yaml",
output:
gtf=temp(
"tmp/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.format.gtf"
"tmp/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.format.{gxf}"
),
threads: 1
resources:
Expand All @@ -89,9 +129,9 @@ rule fair_genome_indexer_agat_convert_sp_gff2gtf:
shadow:
"minimal"
log:
"logs/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.log",
"logs/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}/{gxf}.log",
benchmark:
"benchmark/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.tsv"
"benchmark/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}/{gxf}.tsv"
params:
extra=lookup_config(
dpath="params/fair_genome_indexer_agat_convert_sp_gff2gtf", default=""
Expand All @@ -116,17 +156,17 @@ for grch38

rule fair_genome_indexer_agat_sp_filter_feature_by_attribute_value:
input:
gtf="tmp/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.format.gtf",
config="tmp/fair_genome_indexer_agat_config/config.yaml",
gtf="tmp/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.format.{gxf}",
config="tmp/fair_genome_indexer_agat_config/{gxf}.yaml",
output:
gtf=temp(
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.filtered.gtf"
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.filtered.{gxf}"
),
discarded=temp(
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.features_discarded.txt"
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.{gxf}.features_discarded.txt"
),
report=temp(
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.feaures_report.txt"
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.{gxf}.feaures_report.txt"
),
threads: 1
resources:
Expand All @@ -136,9 +176,9 @@ rule fair_genome_indexer_agat_sp_filter_feature_by_attribute_value:
shadow:
"minimal"
log:
"logs/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.log",
"logs/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}/{gxf}.log",
benchmark:
"benchmark/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.tsv"
"benchmark/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}/{gxf}.tsv"
params:
extra=lookup_config(
dpath="params/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value",
Expand Down Expand Up @@ -168,25 +208,25 @@ rule fair_genome_indexer_agat_sq_filter_feature_from_fasta:
lookup_config(
dpath="params/fair_genome_indexer/agat/select_feature_by_attribute_value",
),
then="tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.filtered.gtf",
otherwise="tmp/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.format.gtf",
then="tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value/{species}.{build}.{release}.filtered.{gxf}",
otherwise="tmp/fair_genome_indexer_agat_convert_sp_gff2gtf/{species}.{build}.{release}.format.{gxf}",
),
fasta=lambda wildcards: get_dna_fasta(wildcards),
fasta_index=lambda wildcards: get_dna_fai(wildcards),
config="tmp/fair_genome_indexer_agat_config/config.yaml",
config="tmp/fair_genome_indexer_agat_config/{gxf}.yaml",
output:
gtf="reference/annotation/{species}.{build}.{release}.gtf",
gtf="reference/annotation/{species}.{build}.{release}/{species}.{build}.{release}.{gxf}",
threads: 1
resources:
mem_mb=lambda wildcards, attempt: 750 + (200 * attempt),
runtime=lambda wildcards, attempt: 15 * attempt,
runtime=lambda wildcards, attempt: 35 * attempt,
tmpdir=tmp,
shadow:
"minimal"
log:
"logs/fair_genome_indexer_agat_sq_filter_feature_from_fasta/{species}.{build}.{release}.log",
"logs/fair_genome_indexer_agat_sq_filter_feature_from_fasta/{species}.{build}.{release}/{gxf}.log",
benchmark:
"benchmark/fair_genome_indexer_agat_sq_filter_feature_from_fasta/{species}.{build}.{release}.tsv"
"benchmark/fair_genome_indexer_agat_sq_filter_feature_from_fasta/{species}.{build}.{release}/{gxf}.tsv"
params:
extra=lookup_config(
dpath="params/fair_genome_indexer_agat_sq_filter_feature_from_fasta",
Expand All @@ -213,26 +253,26 @@ for grch38
use rule fair_genome_indexer_agat_sp_filter_feature_by_attribute_value as fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna with:
input:
gtf=lambda wildcards: get_gtf(wildcards),
config="tmp/fair_genome_indexer_agat_config/config.yaml",
config="tmp/fair_genome_indexer_agat_config/{gxf}.yaml",
output:
gtf=temp(
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.cdna.gtf"
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.cdna.{gxf}"
),
discarded=temp(
"tmp/agat/{species}.{build}.{release}.cdna.feature_discarded.txt"
"tmp/agat/{species}.{build}.{release}.{gxf}.cdna.feature_discarded.txt"
),
report=temp(
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.cdna.feaures_report.txt"
"tmp/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.{gxf}.cdna.feaures_report.txt"
),
threads: 1
resources:
mem_mb=lambda wildcards, attempt: 8_000 + (2_000 * attempt),
runtime=lambda wildcards, attempt: 15 * attempt,
runtime=lambda wildcards, attempt: 35 * attempt,
tmpdir=tmp,
log:
"logs/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.log",
"logs/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.{gxf}log",
benchmark:
"benchmark/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.tsv"
"benchmark/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value_cdna/{species}.{build}.{release}.{gxf}.tsv"
params:
extra=lookup_config(
dpath="params/fair_genome_indexer_agat_sp_filter_feature_by_attribute_value",
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/bcftools_filter_dbsnp.smk
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ rule fair_genome_indexer_bcftools_filter_non_canonical_chrom:
"tmp/fair_genome_indexer_pyfaidx_fasta_dict_to_bed/{species}.{build}.{release}.dna.bed"
),
output:
"reference/variants/{species}.{build}.{release}.all.vcf.gz",
"reference/variants/{species}.{build}.{release}/{species}.{build}.{release}.all.vcf.gz",
threads: 2
resources:
mem_mb=lambda wildcards, attempt: 750 + (250 * attempt),
Expand Down
Loading

0 comments on commit 1633792

Please sign in to comment.