Index

Small python script to calculate a file’s entropy. It reads its input from stdin.

#!/usr/bin/env python

from collections import defaultdict
from math import log

def entropy(input):
    alphabet = defaultdict(int)
    total = 0 # How many bytes we have in the buffer
    buf = True
    while buf:
        buf = input.read(1024 * 64)

        total += len(buf)
        for c in buf:
            alphabet[c] += 1

    if total == 0 or len(alphabet) < 2:
        return 0.0

    entropy = 0.0
    for c in alphabet.values():
        x = float(c) / total
        if x != 0:
            entropy += x * log(x, len(alphabet))
    return -entropy

if __name__ == '__main__':
    import sys
    print entropy(sys.stdin)