BU-ISCIII · victor5lm · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -69,6 +69,7 @@ Code contributions to the new version:
 - Added labels to services.json and updated bioinfo_doc.py and jinja_template_delivery.j2 so that software versions data is displayed in the delivery pdf [#330](https://github.com/BU-ISCIII/buisciii-tools/pull/330).
 - Updated several templates (singularity images, outdated paths, improvements, etc) [#331](https://github.com/BU-ISCIII/buisciii-tools/pull/331)
 - Added permissions fixing after running scratch_copy, as well as a new fix-permissions module in the tools [#332](https://github.com/BU-ISCIII/buisciii-tools/pull/332).
+- Updated MAG lablogs and utils.py [#334](https://github.com/BU-ISCIII/buisciii-tools/pull/334).
 
 ### Modules
 

diff --git a/bu_isciii/templates/mag/ANALYSIS/ANALYSIS02_MAG_TAXONOMICS/lablog b/bu_isciii/templates/mag/ANALYSIS/ANALYSIS02_MAG_TAXONOMICS/lablog
@@ -1,6 +1,13 @@
+# SETUP INTPUT SAMPLE SHEET
 ln -s ../00-reads .
 ln -s ../samples_id.txt .
 
+# Setup samplesheet
+echo "sample,group,short_reads_1,short_reads_2,long_reads" > samplesheet.csv
+cat samples_id.txt | while read in; do
+        echo "${in},,00-reads/${in}_R1.fastq.gz,00-reads/${in}_R2.fastq.gz,"
+done >> samplesheet.csv
+
 #module load Nextflow
 #module load singularity
 
@@ -20,7 +27,7 @@ export NXF_OPTS="-Xms500M -Xmx4G"
 
 nextflow run /data/bi/pipelines/nf-core-mag/nf-core-mag-3.0.3/3_0_3/main.nf \\
           -c ../../DOC/mag.config \\
-          --input '00-reads/*_R{1,2}.fastq.gz' \\
+          --input samplesheet.csv \\
           --outdir $(date '+%Y%m%d')_mag \\
           --kraken2_db /data/bi/references/kraken/minikraken_8GB_20200312.tgz \\
           --skip_busco --skip_spades --skip_spadeshybrid --skip_megahit --skip_prodigal --skip_binning \\

diff --git a/bu_isciii/templates/mag/ANALYSIS/ANALYSIS03_MAG_ALL/lablog b/bu_isciii/templates/mag/ANALYSIS/ANALYSIS03_MAG_ALL/lablog
@@ -10,7 +10,6 @@ done >> samplesheet.csv
 
 scratch_dir=$(echo $PWD | sed "s/\/data\/bi\/scratch_tmp/\/scratch/g")
 
-
 cat <<EOF > mag_all.sbatch
 #!/bin/sh
 #SBATCH --ntasks 1
@@ -24,7 +23,7 @@ cat <<EOF > mag_all.sbatch
 # module load Nextflow/23.10.0 singularity
 export NXF_OPTS="-Xms500M -Xmx8G"
 
-nextflow run /data/bi/pipelines/nf-core-mag/nf-core-mag-2.5.3/workflow/main.nf \\
+nextflow run /data/bi/pipelines/nf-core-mag/nf-core-mag-3.0.3/3_0_3/main.nf \\
     -c ../../DOC/mag.config \\
     -profile singularity \\
     --input samplesheet.csv \\

diff --git a/bu_isciii/templates/sftp_user.json b/bu_isciii/templates/sftp_user.json
@@ -53,5 +53,6 @@
     "mvmoneo": ["SpainUDP"],
     "bbaladron": ["SpainUDP"],
     "bioinfoadm": ["test"],
-    "s.varona": ["misc"]
+    "s.varona": ["misc"],
+    "nlabiod": ["Labarbovirus"]
 }
diff --git a/bu_isciii/utils.py b/bu_isciii/utils.py
@@ -319,18 +319,15 @@ def uncompress_targz_directory(tar_name, directory):
     return
 
 
-def get_md5(file):
+def get_md5(file_path, chunk_size=1 * 1024 * 1024 * 1024):  # 1 GB
     """
     Given a file, open it and digest to get the md5
-    NOTE: might be troublesome when infile is too big
-    Based on:
-    https://www.quickprogrammingtips.com/python/how-to-calculate-md5-hash-of-a-file-in-python.html
     """
-    with open(file, "rb") as infile:
-        infile = infile.read()
-        file_md5 = hashlib.md5(infile).hexdigest()
-
-    return file_md5
+    hash_md5 = hashlib.md5()
+    with open(file_path, "rb") as f:
+        for chunk in iter(lambda: f.read(chunk_size), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
 
 
 def ask_date(previous_date=None, posterior_date=None, initial_year=2010):