-
Notifications
You must be signed in to change notification settings - Fork 93
/
pe_data_directory_features.py
102 lines (86 loc) · 4.11 KB
/
pe_data_directory_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""Extract LIEF features from PE files"""
from h2oaicore.transformer_utils import CustomTransformer
import datatable as dt
import numpy as np
class PEDataDirectoryFeatures(CustomTransformer):
_unsupervised = True
_modules_needed_by_name = ['lief==0.14.1']
_regression = True
_binary = True
_multiclass = True
_is_reproducible = True
_parallel_task = True # if enabled, params_base['n_jobs'] will be >= 1 (adaptive to system), otherwise 1
_can_use_gpu = True # if enabled, will use special job scheduler for GPUs
_can_use_multi_gpu = True # if enabled, can get access to multiple GPUs for single transformer (experimental)
_numeric_output = True
@staticmethod
def get_default_properties():
return dict(col_type="text", min_cols=1, max_cols=1, relative_importance=1)
@staticmethod
def do_acceptance_test():
return False
def fit_transform(self, X: dt.Frame, y: np.array = None):
return self.transform(X)
def load_pe(self, file_path):
with open(file_path, 'rb') as f:
bytez = bytearray(f.read())
return (bytez)
def data_directory_features(self, lief_binary):
data_directories = lief_binary.data_directories
features = {}
for data_directory in data_directories:
features.update(
{'Data_Directory_{}_size'.format(str(data_directory.type).split(".")[1]): data_directory.size})
features.update({'Data_Directory_{}_virtual_address'.format(
str(data_directory.type).split(".")[1]): data_directory.rva})
return features
def get_data_directory_features(self, file_path):
import lief
try:
pe_bytez = self.load_pe(file_path)
lief_binary = lief.PE.parse(list(pe_bytez))
X = self.data_directory_features(lief_binary)
return X
except:
X = {'Data_Directory_EXPORT_TABLE_size': 0,
'Data_Directory_EXPORT_TABLE_virtual_address': 0,
'Data_Directory_IMPORT_TABLE_size': 0,
'Data_Directory_IMPORT_TABLE_virtual_address': 0,
'Data_Directory_RESOURCE_TABLE_size': 0,
'Data_Directory_RESOURCE_TABLE_virtual_address': 0,
'Data_Directory_EXCEPTION_TABLE_size': 0,
'Data_Directory_EXCEPTION_TABLE_virtual_address': 0,
'Data_Directory_CERTIFICATE_TABLE_size': 0,
'Data_Directory_CERTIFICATE_TABLE_virtual_address': 0,
'Data_Directory_BASE_RELOCATION_TABLE_size': 0,
'Data_Directory_BASE_RELOCATION_TABLE_virtual_address': 0,
'Data_Directory_DEBUG_size': 0,
'Data_Directory_DEBUG_virtual_address': 0,
'Data_Directory_ARCHITECTURE_size': 0,
'Data_Directory_ARCHITECTURE_virtual_address': 0,
'Data_Directory_GLOBAL_PTR_size': 0,
'Data_Directory_GLOBAL_PTR_virtual_address': 0,
'Data_Directory_TLS_TABLE_size': 0,
'Data_Directory_TLS_TABLE_virtual_address': 0,
'Data_Directory_LOAD_CONFIG_TABLE_size': 0,
'Data_Directory_LOAD_CONFIG_TABLE_virtual_address': 0,
'Data_Directory_BOUND_IMPORT_size': 0,
'Data_Directory_BOUND_IMPORT_virtual_address': 0,
'Data_Directory_IAT_size': 0,
'Data_Directory_IAT_virtual_address': 0,
'Data_Directory_DELAY_IMPORT_DESCRIPTOR_size': 0,
'Data_Directory_DELAY_IMPORT_DESCRIPTOR_virtual_address': 0,
'Data_Directory_CLR_RUNTIME_HEADER_size': 0,
'Data_Directory_CLR_RUNTIME_HEADER_virtual_address': 0}
return X
def transform(self, X: dt.Frame):
import pandas as pd
ret_df = pd.DataFrame(
[
self.get_data_directory_features(x)
for x in X.to_pandas().values[:, 0]
]
)
self._output_feature_names = ret_df.columns.to_list()
self._feature_desc = self._output_feature_names
return ret_df