[ create a new paste ] login | about

Link: http://codepad.org/tRhevQ11    [ raw code | fork ]

Python, pasted on Feb 27:
import csv
import sys
import string

isHeader = False

splits = int(sys.argv[1])
infile = sys.argv[2]

print 'splitting ' + infile + ' into ' + str(splits) + ' parts...'

def file_len(infile):
    with open(infile,"rb") as f:
        r = csv.reader(f)
        for i, v in enumerate(r):
            pass
    return i

#split up into chunks for writing
linecount = file_len(infile)
linecount += 1
print infile + ' has ' + str(linecount) + ' records, writing files...'
counts = []
if linecount % splits:
    running_count = 0
    for i in range(splits - 1):
        counts.append(linecount/splits)
        running_count += counts[-1]
    counts.append(linecount - running_count)
else:
    for j in splits:
        counts.append(linecount/splits)

with open(infile,"rb") as fin:
    reader = csv.reader(fin)
    if isHeader:
        header = reader.next()
    for i, count in enumerate(counts):
        with open(string.replace(infile,'.csv','') + "_" + str(i+1) + ".csv","wb+") as fout:
            writer = csv.writer(fout)
            if isHeader:
                writer.writerow(header)
            for _ in xrange(count):
                writer.writerow(reader.next())


Create a new paste based on this one


Comments: