-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcount_fraction_correct.py
43 lines (37 loc) · 2.38 KB
/
count_fraction_correct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import numpy as np
from read_all_scores import add_score_categories, mut_types
input_data= [
('data/zemu_control-69aa526-noglypivot-id_50.csv.gz', 8),
('data/zemu_1.2-60000_rscript_simplified-t14-id_50.csv.gz', 35000),
# ('data/zemu_1.2-60000_rscript_validated-t14-id_50.csv.gz', 35000),
# ('data/zemu_control-69aa526-id_50.csv.gz', 8),
]
for csv_path, backrub_steps in input_data:
print csv_path
df = pd.read_csv( csv_path )
df = add_score_categories( pd.read_csv( csv_path ) )
df = df.loc[ (df['ScoreMethodID'] == backrub_steps) ]
df = df.drop_duplicates()
for mut_type in ['complete', 's2l']:
sub_df = df.loc[ df['MutType'] == mut_type ]
if mut_type == 'complete':
assert( len(sub_df) == 1240 )
print '%s subset (n=%d, backrub steps=%d)' % (mut_types[mut_type], len(sub_df), backrub_steps)
sub_df = sub_df.loc[ sub_df['ExperimentalDDG'] <= -1.0 ]
print 'Number of mutations with exp. ddG <= -1.0: %d' % len( sub_df )
print 'Number of the above predicted to be <= -1REU: %d' % len( sub_df.loc[ sub_df['total'] <= -1.0 ] )
print 'Number of the above predicted to be < 0 REU: %d' % len( sub_df.loc[ sub_df['total'] < 0.0 ] )
print 'Number of the above predicted to be > 0 REU: %d' % len( sub_df.loc[ sub_df['total'] > 0.0 ] )
print 'Number of the above predicted to be 0 <= DDG < 1: %d' % len( sub_df.loc[ (sub_df['total'] >= 0.0) & (sub_df['total'] < 1.0) ] )
print 'Number of the above predicted to be >= 1 REU: %d' % len( sub_df.loc[ (sub_df['total'] >= 1.0) ] )
print
sub_df = df.loc[ df['MutType'] == mut_type ]
sub_df = sub_df.loc[ sub_df['ExperimentalDDG'] >= 1.0 ]
print 'Number of mutations with exp. ddG >= 1.0: %d' % len( sub_df )
print 'Number of the above predicted to be <= -1REU: %d' % len( sub_df.loc[ sub_df['total'] <= -1.0 ] )
print 'Number of the above predicted to be < 0 REU: %d' % len( sub_df.loc[ sub_df['total'] < 0.0 ] )
print 'Number of the above predicted to be > 0 REU: %d' % len( sub_df.loc[ sub_df['total'] > 0.0 ] )
print 'Number of the above predicted to be 0 <= DDG < 1: %d' % len( sub_df.loc[ (sub_df['total'] >= 0.0) & (sub_df['total'] < 1.0) ] )
print 'Number of the above predicted to be >= 1 REU: %d' % len( sub_df.loc[ (sub_df['total'] >= 1.0) ] )
print