Solution: Process N files in parallel
examples/multiprocess/create_text_files.py
import sys import string import random def main(): if len(sys.argv) != 3: exit(f"Usage: {sys.argv[0]} NUMBER_OF_FILES NUMBER_OF_ROWS") number_of_files = int(sys.argv[1]) number_of_rows = int(sys.argv[2]) characters = string.ascii_letters + ' ' + string.digits # print(number_of_rows) for file_id in range(1, number_of_files + 1): filename = f"{file_id}.txt" # print(filename) with open(filename, "w") as fh: for _ in range(number_of_rows): length = random.randrange(0, 81) # print(length) row = ''.join(random.choices(characters, k=length)) fh.write(row + "\n") if __name__ == "__main__": main()
examples/multiprocess/count_digits.py
import sys import string def count_digits(filename): count = {} for cr in string.digits: count[cr] = 0 with open(filename) as fh: for row in fh: for cr in row: if cr in string.digits: count[cr] += 1 return { "filename": filename, "count": count, } def print_table(results): dw = 6 width = 0 for res in results: width = max(width, len(res["filename"])) print(" " * (width + 1), end="") for n in range(10): print(f"{n:{dw}}", end="") print("") for res in results: print(f'{res["filename"]:{width}} ', end="") for digit in string.digits: print(f"{res['count'][digit]:{dw}}", end="") print("") total = {} for digit in string.digits: total[digit] = 0 for res in results: for digit in string.digits: total[digit] += res["count"][digit] name = "TOTAL" print(f'{name:{width}} ', end="") for digit in string.digits: print(f"{total[digit]:{dw}}", end="") print("") def main(): if len(sys.argv) < 2: exit(f"Usage: {sys.argv[0]} FILENAMEs") files = sys.argv[1:] results = [] for filename in files: result = count_digits(filename) results.append(result) print_table(results) if __name__ == "__main__": main()
examples/multiprocess/count_digits_map.py
import sys import count_digits def main(): if len(sys.argv) < 2: exit(f"Usage: {sys.argv[0]} FILENAMEs") files = sys.argv[1:] results = map(count_digits.count_digits, files) count_digits.print_table(list(results)) if __name__ == "__main__": main()
examples/multiprocess/count_digits_multiprocessing_map.py
import sys import count_digits import multiprocessing as mp def main(): if len(sys.argv) < 3: exit(f"Usage: {sys.argv[0]} POOL FILENAMEs") size = int(sys.argv[1]) files = sys.argv[2:] with mp.Pool(size) as pool: results = pool.map(count_digits.count_digits, files) count_digits.print_table(list(results)) if __name__ == "__main__": main()