BU-ISCIII · Shettland · Jan 30, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,30 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.5.0dev] - 2025-0X-XX : https://github.com/BU-ISCIII/relecov-tools/releases/tag/XXX
+
+### Credits
+
+Code contributions to the release:
+
+- [Pablo Mata](https://github.com/shettland)
+
+### Modules
+
+#### Added enhancements
+
+- Added a more robust datatype handling in utils.py read_csv_file_return_dict() method [#379](https://github.com/BU-ISCIII/relecov-tools/pull/379)
+
+#### Fixes
+
+#### Changed
+
+- Temporarily changed bioinfo_config 'quality_control' requirement to false [#379](https://github.com/BU-ISCIII/relecov-tools/pull/379)
+
+#### Removed
+
+### Requirements
+
 ## [1.4.0] - 2025-01-27 : https://github.com/BU-ISCIII/relecov-tools/releases/tag/v1.4.0
 
 ### Credits

diff --git a/relecov_tools/conf/bioinfo_config.json b/relecov_tools/conf/bioinfo_config.json
@@ -27,7 +27,7 @@
             "fn": "quality_control_report(?:_\\d{8})?\\.tsv",
             "sample_col_idx": 1,
             "header_row_idx": 1,
-            "required": true,
+            "required": false,
             "function": null,
             "multiple_samples": true,
             "split_by_batch": true,

diff --git a/relecov_tools/utils.py b/relecov_tools/utils.py
@@ -21,7 +21,7 @@
 from tabulate import tabulate
 import openpyxl.utils
 import openpyxl.styles
-
+import pandas as pd
 
 log = logging.getLogger(__name__)
 
@@ -142,39 +142,24 @@ def excel_date_to_num(date):
         return None
 
 
-def read_csv_file_return_dict(file_name, sep=None, key_position=None):
-    """Read csv or tsv file, according to separator, and return a dictionary
+def read_csv_file_return_dict(file_name, sep=None, key_position=0):
+    """Read csv or tsv file, according to separator (sep), and return a dictionary
     where the main key is the first column, if key position is None otherwise
     the index value of the key position is used as key. If sep is None then
-    try to assert a separator automaticallly depending on file extension.
+    try to assert a separator automatically depending on file extension.
     """
-    try:
-        with open(file_name, "r") as fh:
-            lines = fh.readlines()
-    except FileNotFoundError:
-        raise
     if sep is None:
         file_extension = os.path.splitext(file_name)[1]
         extdict = {".csv": ",", ".tsv": "\t", ".tab": "\t"}
         # Use space as a default separator, None would also be valid
         sep = extdict.get(file_extension, " ")
-    heading = lines[0].strip().split(sep)
-    if len(heading) == 1:
-        return {"ERROR": "not valid format"}
-    file_data = {}
-    for line in lines[1:]:
-        line_s = line.strip().split(sep)
-        if key_position is None:
-            file_data[line_s[0]] = {}
-            for idx in range(1, len(heading)):
-                file_data[line_s[0]][heading[idx]] = line_s[idx]
-        else:
-            file_data[line_s[key_position]] = {}
-            for idx in range(len(heading)):
-                if idx == key_position:
-                    continue
-                file_data[line_s[key_position]][heading[idx]] = line_s[idx]
-
+    try:
+        # Read all columns as strings to avoid parsing IDs as float buy try to infer datatypes afterwards
+        file_df = pd.read_csv(file_name, sep=sep, dtype="string").convert_dtypes()
+    except FileNotFoundError:
+        raise
+    key_column = file_df.columns[key_position]
+    file_data = file_df.set_index(key_column).to_dict(orient="index")
     return file_data