import math
def log2(x):
return 1.0*math.log(x)/math.log(2)
class TextEntropy:
def __init__(self, text):
self.text = text.lower()
self.count = len(text)
def scan(self,text):
d = dict()
for c in self.text:
d[c] = d.setdefault(c, 0) + 1
return d
def calculateProbs(self,distribution):
probs = list()
distribution = distribution.values()
for happen in distribution:
probs.append((1.0*happen)/self.count)
return probs
def calculateEntropy(self):
probs = self.calculateProbs(self.scan(self.text))
return -1.0 * reduce(lambda x,y: x+y, map(lambda p: p*log2(p), probs))
def getEntropy(self):
return self.calculateEntropy()
def tests():
lol = "Da lIl GurL WIT baBIEs PiZZa Dat WUS sO Nice nD SwEet! =]"
normal = "The little girl with babies and pizza, that was a nice video"
L = TextEntropy(lol)
N = TextEntropy(normal)
print "-"*75
print "Youtube-derived text:"
print lol
print "The entropy associated with this is:", L.getEntropy()
print "-"*75
print "Normal text:"
print normal
print "The entropy associated with this is:", N.getEntropy()
print "-"*75
if __name__ == "__main__":
tests()