Solution: Merge files with timestamps



examples/dictionary/merge/merge.py
import sys

file_a = sys.argv[1]
file_b = sys.argv[2]

with open(file_a) as fha:
    with open(file_b) as fhb:
        line_a = None
        line_b = None
        while True:
            if line_a is None:
                line_a = fha.readline()
            if line_b is None:
                line_b = fhb.readline()

            if line_a == '' and line_b == '':
                break

            if line_a == '':
                print(line_b, end='')
                line_b = None
                continue

            if line_b == '':
                print(line_a, end='')
                line_a = None
                continue

            time_a = line_a.split(',')[0]
            time_b = line_b.split(',')[0]
            if int(time_a) < int(time_b):
                print(line_a, end='')
                line_a = fha.readline()
            else:
                print(line_b, end='')
                line_b = fhb.readline()

examples/dictionary/merge/merge_all.py
import sys

files = sys.argv[1:]

fhs = {}
rows = {}
for filename in files:
    try:
        fhs[filename] = open(filename)
        rows[filename] = None
    except Exception:
        print("Could not open {filename}")


while True:
    files_with_content = []
    for filename, fh in fhs.items():
        if rows[filename] is None:
            rows[filename] = fh.readline()
        if rows[filename] != '':
            files_with_content.append(filename)

    if not files_with_content:
        break

    sorted_rows = sorted(files_with_content, key=lambda filename: rows[filename].split(',')[0])
    smallest = sorted_rows[0]
    print(rows[smallest], end='')
    rows[smallest] = None


for fh in fhs.values():
    fh.close()