[ create a new paste ] login | about

Link: http://codepad.org/SDJS01X9    [ raw code | fork ]

Python, pasted on Dec 27:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import time
from collections import defaultdict
from itertools import groupby

datefmt = lambda s: time.strftime('%Y-%m-%d', time.strptime(s, '%d/%b/%Y:%H:%M:%S'))

##print "reading raw data"
raw_imp = [ (datefmt(row[0][1:-2]), row[1], row[2]) for row in (l.strip().split(',') for l in open('imp.csv','r').xreadlines())]
raw_click = [ (datefmt(row[0][1:-2]), row[1], row[2]) for row in (l.strip().split(',') for l in open('click.csv','r').xreadlines())]
raw_active = [ (datefmt(row[0][1:-2]), row[1], row[2]) for row in (l.strip().split(',') for l in open('active.csv','r').xreadlines())]


##print "processing raw data"
all_imp = dict((k, list(v)) for k,v in groupby(raw_imp, lambda row: row[0]))
all_click = dict((k, list(v)) for k,v in groupby(raw_click, lambda row: row[0]))
all_active = dict((k, list(v)) for k,v in groupby(raw_active, lambda row: row[0]))


##print "getting target data"
target_click = dict((k, set((row[1], row[2]) for row in v)) for (k,v) in all_click.iteritems())
target_active = dict(
    (k, [(row[1], row[2]) for row in v if (row[1], row[2]) in target_click[k]])
    for (k,v) in
    all_active.iteritems() if k in target_click)

"""
for k, v in target_active.iteritems():
    print k, v
"""

##print "done"
print "日期\t展示量\t点击量\t激活量\t点击率\t转换率"
for imp,click,active in zip(all_imp.iteritems(), all_click.iteritems(),target_active.iteritems()):
    imp_ct, click_ct, active_ct = len(imp[1]), len(click[1]), len(active[1])

    print imp[0], imp_ct, click_ct, active_ct, '%.04f%%' % (float(click_ct)*100.0/float(imp_ct)), '%.04f%%' % (float(active_ct)*100.0/float(click_ct))


Create a new paste based on this one


Comments: