From 763d5a77092668fd58e6c73cf5dad15bfeeb326f Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Tue, 11 Apr 2023 13:44:14 +0100
Subject: [PATCH 01/12] - Added hyperlink handling for list of cosmic ids
 joined by '&'. - Changed cosmic stub to new one which incorporates genome
 build.

---
 .../home/dnanexus/generate_workbook/utils/vcf.py   | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/resources/home/dnanexus/generate_workbook/utils/vcf.py b/resources/home/dnanexus/generate_workbook/utils/vcf.py
index 6d7b1d1b..6a9d0f09 100644
--- a/resources/home/dnanexus/generate_workbook/utils/vcf.py
+++ b/resources/home/dnanexus/generate_workbook/utils/vcf.py
@@ -44,7 +44,7 @@ def __init__(self, args) -> None:
         self.urls = {
             "csq_existing_variation": "https://www.ncbi.nlm.nih.gov/snp/",
             "csq_clinvar": "https://www.ncbi.nlm.nih.gov/clinvar/variation/",
-            "csq_cosmic": "https://cancer.sanger.ac.uk/cosmic/search?q=",
+            "csq_cosmic": "https://cancer.sanger.ac.uk/cosmic/search?genome=BUILD&q=",  # genome=37&q={ID}
             "csq_hgmd": "https://my.qiagendigitalinsights.com/bbp/view/hgmd/pro/mut.php?acc=",
             "csq_mastermind_mmid3": "https://mastermind.genomenon.com/detail?mutation=",
             "gnomad_base_url": "https://gnomad.broadinstitute.org/variant/CHROM-POS-REF-ALT",
@@ -469,6 +469,9 @@ def add_hyperlinks(self) -> None:
                 if 'gnomad' in col.lower():
                     # gnomAD columns won't be exact match on name to dict
                     url = self.urls.get('gnomad')
+                elif 'cosmic' in col.lower():
+                    url = self.urls.get('csq_cosmic')
+                    print(f"csomic stub:{url}")
                 else:
                     url = self.urls.get(col.lower(), None)
 
@@ -557,6 +560,15 @@ def make_hyperlink(self, column, url, value, build):
             # sheet so there is no need to display full length hyperlink
             value[column] = url.split('/')[-1]
 
+        elif 'cosmic' in column.lower():
+            # COSMIC requires the url to have the COSM ID added to the url
+            # stub differs based on genome build
+            url = url.replace('BUILD', str(build))
+            # COSMIC IDs are separated by & and rejoined for each unique ID
+            # re-assigned to value[column] to display only unique IDs in excel
+            value[column] = '&'.join(set(value[column].split('&')))
+            url = f'{url}{value[column]}'
+
         else:
             # other URLs with value appended to end
             url = f'{url}{value[column]}'

From 6c5ddc5a6b40f2cb7a21abee70276f02f3849337 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Wed, 12 Apr 2023 09:23:01 +0100
Subject: [PATCH 02/12] tests for cosmic handling: - checks correct output for
 single cosmic id. - checks correct output for multiple cosmic ids
 (COSV12345&COSV12345).

---
 .../generate_workbook/tests/test_vcf.py       | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
index 48bd5643..600e6638 100644
--- a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
+++ b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
@@ -484,6 +484,62 @@ def test_gnomad_build_38():
             "gnomAD AF link output incorrect for build 38 input"
         )
 
+    @staticmethod
+    def test_cosmic_build_37():
+        '''
+        Test that the COSMIC links are generated correctly for build 37
+        '''
+        # Intialise test dataframe with build 37 genome positions
+        df = pd.DataFrame([
+            {'CHROM': 1, 'POS': 2488153, 'REF': 'A',
+             'ALT': 'G', 'COSMICcMuts': 'COSV63186428'},
+        ])
+
+        test_vcf = vcf(argparse.Namespace())
+        test_vcf.vcfs = [df]
+        test_vcf.refs = ['37']  # Set reference = build 37
+
+        # Call function to add hyperlinks
+        vcf.add_hyperlinks(test_vcf)
+
+        valid_string = (
+            '=HYPERLINK("https://cancer.sanger.ac.uk/cosmic/search?'
+            'genome=37&q=COSV63186428", "COSV63186428")'
+        )
+
+        # Assert the output is as expected
+        assert test_vcf.vcfs[0]["COSMICcMuts"][0] == valid_string, (
+            "COSMICcMuts link output incorrect for build 37 input"
+        )
+
+    @staticmethod
+    def test_cosmic_build_37_multiple():
+        '''
+        Test that the COSMIC links are generated correctly for build 37
+        '''
+        # Intialise test dataframe with build 37 genome positions
+        df = pd.DataFrame([
+            {'CHROM': 1, 'POS': 2488153, 'REF': 'A',
+             'ALT': 'G', 'COSMICcMuts': 'COSV63186428&COSV63186428&COSV63186428'},
+        ])
+
+        test_vcf = vcf(argparse.Namespace())
+        test_vcf.vcfs = [df]
+        test_vcf.refs = ['37']  # Set reference = build 37
+
+        # Call function to add hyperlinks
+        vcf.add_hyperlinks(test_vcf)
+
+        valid_string = (
+            '=HYPERLINK("https://cancer.sanger.ac.uk/cosmic/search?'
+            'genome=37&q=COSV63186428", "COSV63186428")'
+        )
+
+        # Assert the output is as expected
+        assert test_vcf.vcfs[0]["COSMICcMuts"][0] == valid_string, (
+            "COSMICcMuts link output incorrect for build 37 input"
+        )
+
 if __name__ == "__main__":
     header = TestHeader()
     header.test_column_names()

From 9ca1d196b5c30f9f0e333d7671ecbeaaaf776b7a Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Tue, 25 Apr 2023 16:21:55 +0100
Subject: [PATCH 03/12] removed testing prints

---
 resources/home/dnanexus/generate_workbook/utils/vcf.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/resources/home/dnanexus/generate_workbook/utils/vcf.py b/resources/home/dnanexus/generate_workbook/utils/vcf.py
index 6a9d0f09..16f6303c 100644
--- a/resources/home/dnanexus/generate_workbook/utils/vcf.py
+++ b/resources/home/dnanexus/generate_workbook/utils/vcf.py
@@ -471,7 +471,6 @@ def add_hyperlinks(self) -> None:
                     url = self.urls.get('gnomad')
                 elif 'cosmic' in col.lower():
                     url = self.urls.get('csq_cosmic')
-                    print(f"csomic stub:{url}")
                 else:
                     url = self.urls.get(col.lower(), None)
 
@@ -564,9 +563,7 @@ def make_hyperlink(self, column, url, value, build):
             # COSMIC requires the url to have the COSM ID added to the url
             # stub differs based on genome build
             url = url.replace('BUILD', str(build))
-            # COSMIC IDs are separated by & and rejoined for each unique ID
-            # re-assigned to value[column] to display only unique IDs in excel
-            value[column] = '&'.join(set(value[column].split('&')))
+            # Build COSMIC URL and set value to display equal to what is in
             url = f'{url}{value[column]}'
 
         else:

From 5d0b0337e131fad6d703f5854b16e4e40904e1c3 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Tue, 25 Apr 2023 16:37:21 +0100
Subject: [PATCH 04/12] - changed to handle all VEP fields not just cosmic. -
 creates set  to get unique COSV ids, sorts and then re-join by '&'.

---
 .../generate_workbook/utils/columns.py        | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/resources/home/dnanexus/generate_workbook/utils/columns.py b/resources/home/dnanexus/generate_workbook/utils/columns.py
index 01f9c38e..c810e369 100644
--- a/resources/home/dnanexus/generate_workbook/utils/columns.py
+++ b/resources/home/dnanexus/generate_workbook/utils/columns.py
@@ -1,6 +1,7 @@
 import sys
 from typing import Union
 import pandas as pd
+import time
 
 
 class splitColumns():
@@ -28,14 +29,15 @@ def split(self, vcf_df) -> Union[pd.DataFrame, int]:
         """
         vcf_df = self.info(vcf_df)
         vcf_df = self.format_fields(vcf_df)
-        vcf_df = self.unique_cosmic(vcf_df)
+        vcf_df = self.unique_vep(vcf_df)
 
         return vcf_df
 
 
-    def unique_cosmic(self, vcf_df) -> pd.DataFrame:
+    def unique_vep(self, vcf_df):
         """
         Handle known bug in VEP annotation where it duplicates COSMIC IDs
+        This creates a
 
         Parameters
         ----------
@@ -47,10 +49,16 @@ def unique_cosmic(self, vcf_df) -> pd.DataFrame:
         vcf_df : pd.DataFrame
             dataframe of variants
         """
-        if 'COSMIC' in vcf_df.columns:
-            vcf_df['COSMIC'] = vcf_df['COSMIC'].apply(
-                lambda x: ' & '.join(set(x.split('&')))
+
+        # Find all columns that start with 'csq'
+        csq_columns = [col for col in vcf_df.columns if col.lower().startswith('csq')]
+
+        # Join the 'csq' columns using '&' and remove duplicates
+        for col in csq_columns:
+            vcf_df[col] = vcf_df[col].apply(
+                lambda x: ' & '.join(sorted(set(x.split('&')))) if isinstance(x, str) else x
             )
+
         return vcf_df
 
 
@@ -174,12 +182,10 @@ def info(self, vcf_df) -> pd.DataFrame:
         info_keys = [x for x in info_keys if x]  # can end up with empty string
 
         info_values = []
-
         # info_pairs -> list of list of pairs, one list per variant
         for variant_pairs in info_pairs:
             # for every variants values, split them out to dict to add to df
             pair_values = {}
-
             for pair in variant_pairs:
                 if '=' in pair:
                     # key value pair
@@ -187,7 +193,6 @@ def info(self, vcf_df) -> pd.DataFrame:
                 else:
                     # Flag value present (e.g STR)
                     key, value = pair, True
-
                 pair_values[key] = value
 
             info_values.append(pair_values)
@@ -199,4 +204,5 @@ def info(self, vcf_df) -> pd.DataFrame:
         # drop INFO and CSQ as we fully split them out
         vcf_df.drop(['INFO'], axis=1, inplace=True)
 
+
         return vcf_df

From 4307f0c651faa494b29f4dbaa32b716967ca6b2c Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Tue, 25 Apr 2023 16:38:01 +0100
Subject: [PATCH 05/12] removed time used for testing

---
 resources/home/dnanexus/generate_workbook/utils/columns.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/resources/home/dnanexus/generate_workbook/utils/columns.py b/resources/home/dnanexus/generate_workbook/utils/columns.py
index c810e369..861fd722 100644
--- a/resources/home/dnanexus/generate_workbook/utils/columns.py
+++ b/resources/home/dnanexus/generate_workbook/utils/columns.py
@@ -1,7 +1,6 @@
 import sys
 from typing import Union
 import pandas as pd
-import time
 
 
 class splitColumns():

From 9aecfa7a84139baa56745dc59ed03af77cc271a8 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Tue, 25 Apr 2023 16:41:22 +0100
Subject: [PATCH 06/12] Added tests for VEP handling in columns.py Added
 add_columns to namespace Added test for Cosmic Hyperlinks

---
 .../generate_workbook/tests/test_columns.py   | 64 +++++++++++++++++++
 .../generate_workbook/tests/test_vcf.py       | 55 ++++++++--------
 2 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/resources/home/dnanexus/generate_workbook/tests/test_columns.py b/resources/home/dnanexus/generate_workbook/tests/test_columns.py
index 8dd12edb..78bc475f 100644
--- a/resources/home/dnanexus/generate_workbook/tests/test_columns.py
+++ b/resources/home/dnanexus/generate_workbook/tests/test_columns.py
@@ -25,6 +25,7 @@ def read_test_vcf(vcf_file):
     vcf_handler = vcf(argparse.Namespace(
         add_name=False, analysis='', clinical_indication='', exclude=None,
         filter=None, include=None, keep=False, merge=False,
+        add_comment_column=False,
         out_dir='',
         output='',
         panel='', print_columns=False, print_header=False, reads='',
@@ -250,6 +251,69 @@ def test_format_sample_values_are_correct(self):
         )
 
 
+class TestVEPHandling():
+    """
+    Tests for splitColumns.unique_vep() that handles
+    duplicates in INFO/CSQ VEP columns.
+    """
+    # test vcf standard sample
+    test_vcf = os.path.join(TEST_DATA_DIR, "HD753-unittest_annotated.split.vcf")
+    # run dataframe through splitColumns.info() to split out INFO column
+    vcf_df = read_test_vcf(vcf_file=test_vcf)
+    vcf_df = splitColumns().split(vcf_df)
+
+
+    def test_parsed_correct_COSMICcMuts_values(self):
+        """
+        Test values read into dataframe for COSMICcMuts match the values
+        above from the VCF
+        """
+        # read COSMICcMuts values from vcf
+        output = subprocess.run(
+            (
+                f"grep -v '^#' {self.test_vcf} | grep -oh "
+                f"'COSMICcMuts=[A-Z0-9&\.]*;' | sort | uniq"
+            ), shell=True, capture_output=True
+        )
+        # clean up values
+        stdout = output.stdout.decode().splitlines()
+        stdout = sorted(list([
+            x.replace(';', '').replace('COSMICcMuts=', '') for x in stdout
+        ]))
+        stdout = [' & '.join(set(x.split("&"))) for x in stdout]
+        # get COSMICcMuts values from dataframe
+        df_values = sorted(list(self.vcf_df['CSQ_COSMICcMuts'].unique().tolist()))
+        assert all([str(x) == str(y) for x, y in zip(stdout, df_values)]), (
+            "COSMICcMuts values in VCF do not match those in dataframe"
+        )
+
+    def test_parsed_correct_COSMICncMuts_values(self):
+            """
+            Test values read into dataframe for COSMICncMuts match the values
+            above from the VCF
+            """
+            # read COSMICncMuts values from vcf
+            output = subprocess.run(
+                (
+                    f"grep -v '^#' {self.test_vcf} | grep -oh "
+                    f"'COSMICncMuts=[A-Z0-9&\.]*;' | sort | uniq"
+                ), shell=True, capture_output=True
+            )
+
+            # clean up values
+            stdout = output.stdout.decode().splitlines()
+            stdout = sorted(list([
+                x.replace(';', '').replace('COSMICncMuts=', '') for x in stdout
+            ]))
+            stdout = [' & '.join(set(x.split("&"))) for x in stdout]
+            # get COSMICncMuts values from dataframe
+            df_values = sorted(list(self.vcf_df['CSQ_COSMICncMuts'].unique().tolist()))
+
+            assert all([str(x) == str(y) for x, y in zip(stdout, df_values)]), (
+                "COSMICncMuts values in VCF do not match those in dataframe"
+            )
+
+
 if __name__ == "__main__":
     columns = TestMainColumns()
     columns.test_filter()
diff --git a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
index 600e6638..12b1c725 100644
--- a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
+++ b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
@@ -93,6 +93,7 @@ class instance of vcf from utils
             add_name=True, analysis='',
             filter=None, keep=False, merge=False,
             reorder=[], exclude=None, include=None,
+            add_comment_column=False,
             out_dir='', output='',
             panel='', print_columns=False, print_header=False, reads='',
             rename=None, sample='', sheets=['variants'], summary=None,
@@ -512,33 +513,33 @@ def test_cosmic_build_37():
             "COSMICcMuts link output incorrect for build 37 input"
         )
 
-    @staticmethod
-    def test_cosmic_build_37_multiple():
-        '''
-        Test that the COSMIC links are generated correctly for build 37
-        '''
-        # Intialise test dataframe with build 37 genome positions
-        df = pd.DataFrame([
-            {'CHROM': 1, 'POS': 2488153, 'REF': 'A',
-             'ALT': 'G', 'COSMICcMuts': 'COSV63186428&COSV63186428&COSV63186428'},
-        ])
-
-        test_vcf = vcf(argparse.Namespace())
-        test_vcf.vcfs = [df]
-        test_vcf.refs = ['37']  # Set reference = build 37
-
-        # Call function to add hyperlinks
-        vcf.add_hyperlinks(test_vcf)
-
-        valid_string = (
-            '=HYPERLINK("https://cancer.sanger.ac.uk/cosmic/search?'
-            'genome=37&q=COSV63186428", "COSV63186428")'
-        )
-
-        # Assert the output is as expected
-        assert test_vcf.vcfs[0]["COSMICcMuts"][0] == valid_string, (
-            "COSMICcMuts link output incorrect for build 37 input"
-        )
+    # @staticmethod
+    # def test_cosmic_build_37_multiple():
+    #     '''
+    #     Test that the COSMIC links are generated correctly for build 37
+    #     '''
+    #     # Intialise test dataframe with build 37 genome positions
+    #     df = pd.DataFrame([
+    #         {'CHROM': 1, 'POS': 2488153, 'REF': 'A',
+    #          'ALT': 'G', 'COSMICcMuts': 'COSV63186428&COSV63186428&COSV63186428'},
+    #     ])
+
+    #     test_vcf = vcf(argparse.Namespace())
+    #     test_vcf.vcfs = [df]
+    #     test_vcf.refs = ['37']  # Set reference = build 37
+
+    #     # Call function to add hyperlinks
+    #     vcf.add_hyperlinks(test_vcf)
+
+    #     valid_string = (
+    #         '=HYPERLINK("https://cancer.sanger.ac.uk/cosmic/search?'
+    #         'genome=37&q=COSV63186428", "COSV63186428")'
+    #     )
+
+    #     # Assert the output is as expected
+    #     assert test_vcf.vcfs[0]["COSMICcMuts"][0] == valid_string, (
+    #         "COSMICcMuts link output incorrect for build 37 input"
+    #     )
 
 if __name__ == "__main__":
     header = TestHeader()

From 08e061fc440a4220f94e91b2f91912d4e8cf599f Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Wed, 26 Apr 2023 09:22:47 +0100
Subject: [PATCH 07/12] removed old redundant test for handling cosmic
 duplicates.

---
 .../generate_workbook/tests/test_vcf.py       | 27 -------------------
 1 file changed, 27 deletions(-)

diff --git a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
index 12b1c725..be4992e5 100644
--- a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
+++ b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
@@ -513,33 +513,6 @@ def test_cosmic_build_37():
             "COSMICcMuts link output incorrect for build 37 input"
         )
 
-    # @staticmethod
-    # def test_cosmic_build_37_multiple():
-    #     '''
-    #     Test that the COSMIC links are generated correctly for build 37
-    #     '''
-    #     # Intialise test dataframe with build 37 genome positions
-    #     df = pd.DataFrame([
-    #         {'CHROM': 1, 'POS': 2488153, 'REF': 'A',
-    #          'ALT': 'G', 'COSMICcMuts': 'COSV63186428&COSV63186428&COSV63186428'},
-    #     ])
-
-    #     test_vcf = vcf(argparse.Namespace())
-    #     test_vcf.vcfs = [df]
-    #     test_vcf.refs = ['37']  # Set reference = build 37
-
-    #     # Call function to add hyperlinks
-    #     vcf.add_hyperlinks(test_vcf)
-
-    #     valid_string = (
-    #         '=HYPERLINK("https://cancer.sanger.ac.uk/cosmic/search?'
-    #         'genome=37&q=COSV63186428", "COSV63186428")'
-    #     )
-
-    #     # Assert the output is as expected
-    #     assert test_vcf.vcfs[0]["COSMICcMuts"][0] == valid_string, (
-    #         "COSMICcMuts link output incorrect for build 37 input"
-    #     )
 
 if __name__ == "__main__":
     header = TestHeader()

From 22cb7b5de54001487f552ca4ba6fe988067a7b60 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Thu, 27 Apr 2023 15:25:28 +0100
Subject: [PATCH 08/12] Fixed Decipher build38 test in test_vcf.py

---
 resources/home/dnanexus/generate_workbook/tests/test_vcf.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
index be4992e5..d8b0f784 100644
--- a/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
+++ b/resources/home/dnanexus/generate_workbook/tests/test_vcf.py
@@ -412,12 +412,11 @@ def test_decipher_links_build_38():
         ])
 
         test_vcf = vcf(argparse.Namespace(decipher=True))
-        test_vcf.vcf = [df]
+        test_vcf.vcfs = [df]
         test_vcf.refs = ['38']  # Set reference = build 38
 
         # Call function to add hyperlinks
         vcf.add_hyperlinks(test_vcf)
-
         # Define expected string output
         valid_string = (
             '=HYPERLINK("https://www.deciphergenomics.org/sequence-variant/1-6'

From e00e7ed10ff6387256f92f935954e76caa32adc2 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Thu, 27 Apr 2023 15:26:06 +0100
Subject: [PATCH 09/12] Updated requirements.

---
 requirements.txt | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 855a6861..82b324a2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,13 @@
 colour==0.1.5
-python-Levenshtein==0.12.2
 openpyxl==3.0.9
-pandas==1.3.5
\ No newline at end of file
+pandas==1.3.5
+et-xmlfile==1.1.0
+filetype==1.1.0
+jarowinkler==1.2.1
+Levenshtein==0.20.2
+numpy==1.23.2
+python-dateutil==2.8.2
+python-Levenshtein==0.12.2
+pytz==2022.2.1
+rapidfuzz==2.5.0
+six==1.16.0

From 5bf087fbe377ab85f7276cfb93a3ad9285ff2f45 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Thu, 27 Apr 2023 15:57:29 +0100
Subject: [PATCH 10/12] Updated version with notes in dxapp.json

---
 dxapp.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dxapp.json b/dxapp.json
index 30ad6d51..3039fe89 100644
--- a/dxapp.json
+++ b/dxapp.json
@@ -3,11 +3,11 @@
   "title": "eggd_generate_variant_workbook",
   "summary": "Create Excel workbook from VEP annotated vcf",
   "dxapi": "1.0.0",
-  "version": "2.3.0",
+  "version": "2.4.0",
   "properties": {
-    "githubRelease": "v2.3.0"
+    "githubRelease": "v2.4.0"
   },
-  "whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet;",
+  "whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet;  * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..);",
   "authorizedUsers": [
     "org-emee_1"
   ],

From a5b05c937c8f2b3be7860cd04da96c1863817501 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Tue, 2 May 2023 14:30:01 +0100
Subject: [PATCH 11/12] PEP8 blankline

---
 resources/home/dnanexus/generate_workbook/tests/test_columns.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/resources/home/dnanexus/generate_workbook/tests/test_columns.py b/resources/home/dnanexus/generate_workbook/tests/test_columns.py
index 78bc475f..7ac21a2e 100644
--- a/resources/home/dnanexus/generate_workbook/tests/test_columns.py
+++ b/resources/home/dnanexus/generate_workbook/tests/test_columns.py
@@ -58,6 +58,7 @@ def read_column_from_vcf(vcf, column) -> list:
 
     return output.stdout.decode().splitlines()
 
+
 class TestMainColumns():
     """
     Tests for ensuring the CHROM, POS, REF, ALT, ID, QUAL and FILTER

From 2cdcdeb81bafa24141cdb7e6b9668a6592030372 Mon Sep 17 00:00:00 2001
From: RSWilson1 <robert.s.wilson1996@gmail.com>
Date: Tue, 2 May 2023 15:46:13 +0100
Subject: [PATCH 12/12] unique_vep function - added return type for function -
 changed delimiter returned from ' & ' to '&'.

---
 resources/home/dnanexus/generate_workbook/utils/columns.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/home/dnanexus/generate_workbook/utils/columns.py b/resources/home/dnanexus/generate_workbook/utils/columns.py
index 861fd722..68342d03 100644
--- a/resources/home/dnanexus/generate_workbook/utils/columns.py
+++ b/resources/home/dnanexus/generate_workbook/utils/columns.py
@@ -33,7 +33,7 @@ def split(self, vcf_df) -> Union[pd.DataFrame, int]:
         return vcf_df
 
 
-    def unique_vep(self, vcf_df):
+    def unique_vep(self, vcf_df) -> pd.DataFrame:
         """
         Handle known bug in VEP annotation where it duplicates COSMIC IDs
         This creates a
@@ -55,7 +55,7 @@ def unique_vep(self, vcf_df):
         # Join the 'csq' columns using '&' and remove duplicates
         for col in csq_columns:
             vcf_df[col] = vcf_df[col].apply(
-                lambda x: ' & '.join(sorted(set(x.split('&')))) if isinstance(x, str) else x
+                lambda x: '&'.join(sorted(set(x.split('&')))) if isinstance(x, str) else x
             )
 
         return vcf_df