Skip to content

Commit

Permalink
Added matching sex column
Browse files Browse the repository at this point in the history
  • Loading branch information
Aisha-D committed Mar 16, 2021
1 parent 7cdddab commit 54f7dc8
Showing 1 changed file with 40 additions and 0 deletions.
40 changes: 40 additions & 0 deletions resources/home/dnanexus/reformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,44 @@ def Predict_Sex(data, f_cutoff, m_cutoff):

return data

def Matching_Sexes(data):
"""Gives true or false whether reported or predicted sex match
Args:
data (panda data frame): output from {sample}.somalier.samples.tsv
Returns:
data (pandas data frame): Updates dataframe
including predicted sex column
"""
Reported_Sex = list(data.original_pedigree_sex)
Predicted_Sex = list(data.Predicted_Sex)
Match = []

for sample in range(0,len(Reported_Sex)):
reported_sex_sample = Reported_Sex[sample]
predicted_sex_sample = Predicted_Sex[sample]
sex_match = reported_sex_sample == predicted_sex_sample
Match.append(sex_match)

# Match list is a booleans and not strings so we hard to apply
# string functions. Convert each boolean to string
#

Match_lowercase = []

for boolean in Match:
boolean_string = str(boolean)
boolean_string_lowercase = boolean_string.lower()
Match_lowercase.append(boolean_string_lowercase)

print(Match_lowercase)

Match_Sexes = pd.DataFrame({'Match_Sexes': Match_lowercase})

data = pd.concat([data, Match_Sexes], axis=1)

return data

def main():

Expand All @@ -152,6 +190,8 @@ def main():

data = Predict_Sex(data, f_cutoff, m_cutoff)

data = Matching_Sexes(data)

# replace over existing file
data.to_csv(
'Multiqc_' + args.input_data,
Expand Down

0 comments on commit 54f7dc8

Please sign in to comment.