-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathentropy.py
38 lines (32 loc) · 962 Bytes
/
entropy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
'''
Calculate Shannon Entropy (min bits per byte-character)
original source: https://www.kennethghartman.com/calculate-file-entropy/
'''
__version__ = '0.1'
__description__ = 'Calculate Shannon Entropy for given file'
import sys
import math
import collections
def main():
entropy()
def entropy():
print('Opening file {}...'.format(sys.argv[1]))
with open(sys.argv[1], 'rb') as f:
byteArr = list(f.read())
fileSize = len(byteArr)
print
print('File size in bytes: {:,d}'.format(fileSize))
# calculate the frequency of each byte value in the file
print('Calculating Shannon entropy of file. Please wait...')
e = 0
counter = collections.Counter(byteArr)
l = len(byteArr)
for count in counter.values():
# count is always > 0
p_x = count / l
e += - p_x * math.log2(p_x)
print('Shannon entropy: {}'.format(e))
print
if __name__== "__main__":
main()