Skip to content

Commit

Permalink
add missing columns test
Browse files Browse the repository at this point in the history
  • Loading branch information
kjwinfield committed Jan 6, 2025
1 parent e9e23b1 commit f79404c
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 7 deletions.
26 changes: 19 additions & 7 deletions resources/home/dnanexus/make_workbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,17 +411,29 @@ def add_epic_data(self):
family_id = self.wgs_data['family_id']
# Only run if there are only parents and proband
if self.other_relation is False:
required_cols = [
"WGS Referral ID",
"External Specimen Identifier",
"Specimen Identifier",
"Patient Stated Gender",
"Year of Birth"
]
# Read in csv as df, using only relevant columns
df = pd.read_csv(
self.args.epic_clarity,
usecols=[
"WGS Referral ID",
"External Specimen Identifier",
"Specimen Identifier",
"Patient Stated Gender",
"Year of Birth"
]
usecols=lambda x: x in required_cols
)

# Check that required columns are present in Epic extract
missing_columns = set(required_cols) - set(df.columns)

if missing_columns:
raise ValueError(
"EPIC Clarity extract is missing required column(s): "
f"{missing_columns}. Please amend extract, or run again "
"without it."
)

# Filter df to only have rows with the family ID for this case
fam_df = df.loc[df['WGS Referral ID'] == family_id]

Expand Down
18 changes: 18 additions & 0 deletions resources/home/dnanexus/tests/test_make_workbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class TestWorkbook():
}
}
wgs_data = {
"family_id": "r12345",
"interpretation_request_data": {
"json_request": {
"pedigree": {
Expand Down Expand Up @@ -80,6 +81,23 @@ def test_get_penetrance(self):
excel.get_penetrance(self)
assert self.summary_content[(3,2)] == "complete, incomplete"

@mock.patch('pandas.read_csv')
def test_epic_extract_with_incorrect_column_names_raises_error(self, pd_read_csv_mock):
self.args = argparse.Namespace
self.args.epic_clarity = None
self.other_relation = False
# This should error as required Specimen Identifier cols are missing
mock_df = pd.DataFrame(
{
"Year of Birth": [1937, 1975],
"Patient Stated Gender": [1, 2],
"WGS Referral ID": ["r12345", "r67890"]
}
)
pd_read_csv_mock.return_value = mock_df
with pytest.raises(ValueError):
excel.add_epic_data(self)


class TestInterpretationService():
'''
Expand Down

0 comments on commit f79404c

Please sign in to comment.