Small python script to calculate a file’s entropy. It reads its input from stdin.
#!/usr/bin/env python
from collections import defaultdict
from math import log
def entropy(input):
alphabet = defaultdict(int)
total = 0 # How many bytes we have in the buffer
buf = True
while buf:
buf = input.read(1024 * 64)
total += len(buf)
for c in buf:
alphabet[c] += 1
if total == 0 or len(alphabet) < 2:
return 0.0
entropy = 0.0
for c in alphabet.values():
x = float(c) / total
if x != 0:
entropy += x * log(x, len(alphabet))
return -entropy
if __name__ == '__main__':
import sys
print entropy(sys.stdin)