Exercise: compare split words:
We have three ways of splitting a string into words. Using split, using re.split and by going over it character-by-charcter. Which one is the fastest?
examples/perf/split_to_words.py
import sys import re def split_to_words_by_regex(text): return re.split(' ', text) def split_to_words_by_split(text): return text.split() def split_to_words_by_chars(text): words = [] word = '' for ch in text: if ch == ' ': words.append(word) word = '' else: word += ch if word: words.append(word) return words if __name__ == '__main__': if len(sys.argv) < 2: exit(f"Usage: {sys.argv[0]} FILENAME") filename = sys.argv[1] with open(filename) as fh: text = fh.read() res1 = split_to_words_by_split(text) res2 = split_to_words_by_chars(text) res3 = split_to_words_by_regex(text) #print(res1) #print(res2) assert res1 == res2 assert res1 == res3