#! /usr/bin/python
import re
import sys
import time
def f5(seq, idfun=None):
# order preserving
if idfun is None:
def idfun(x): return x
seen = {}
result = []
for item in seq:
marker = idfun(item)
if marker in seen: continue
seen[marker] = 1
result.append(item)
return result
file1 = r"/home/python/extract_input"
rawstr = r"""(?P<date>\w{3,5}\s\d{2}?)\s\d{2}:\d{2}:\d{2}\s(?P<rest>.*?$)"""
# Open The File
InputFile = open(file1, "r")
OutputFile = open("extract_output.txt","w")
OutputFile.write(time.asctime()+"\n")
# Create Dict based on (DATE,MESSAGE) as key
dict = {}
rxinput = re.compile(rawstr)
for line in InputFile:
for input in rxinput.finditer(line):
DATE = (input.group(1))
MESSAGE = (input.group(2))
if (DATE,MESSAGE) in dict.keys():
dict[(DATE,MESSAGE)][0]+=1
print dict[(DATE,MESSAGE)][0]
break
else:
dict[(DATE,MESSAGE)]=[1,MESSAGE]
# Output Test
count=1
for x in dict.keys():
print count,' ',x,' --> ',dict[x][0]
count+=1
print ""
InputFile.close()
OutputFile.close()