- collections
- defaultdict
- Counter
- timeit
- try
- except
Counting words - which implementation is faster?
- In this example we have 4 functions counting the number of appearances of words that are already in memmory in a list.
- We use timeit to benchmark them.
- repeat is the number of repetition of each string.
- different is the number of different string.
examples/dictionary/count_words_speed.py
from collections import defaultdict from collections import Counter import timeit def generate_list_of_words(number, repeat): #words = ['Wombat', 'Rhino', 'Sloth', 'Tarantula', 'Sloth', 'Rhino', 'Sloth'] words = [] for ix in range(number): for _ in range(repeat): words.append(str(ix)) return words def plain_counter(words): counter = {} for word in words: if word not in counter: counter[word] = 0 counter[word] += 1 return counter def counter_with_exceptions(words): counter = {} for word in words: try: counter[word] += 1 except KeyError: counter[word] = 1 return counter def counter_with_counter(words): counter = Counter() for word in words: counter[word] += 1 return counter def counter_with_default_dict(words): counter = defaultdict(int) for word in words: counter[word] += 1 return counter def main(): #words = generate_list_of_words(1000, 1) #counter = plain_counter(words) #counter = counter_with_counter(words) #counter = counter_with_default_dict(words) #counter = counter_with_exceptions(words) #for word in sorted(counter.keys()): # print("{}:{}".format(word, counter[word])) for repeat in [1, 10, 20, 50]: different = int(1000 / repeat) print(f'repeat {repeat} different {different}') for name in ['plain_counter', 'counter_with_counter', 'counter_with_default_dict', 'counter_with_exceptions']: print("{:26} {}".format(name, timeit.timeit(f'{name}(words)', number=10000, setup=f'from __main__ import {name}, generate_list_of_words; words = generate_list_of_words({different}, {repeat})'))) print() if __name__ == "__main__": main()
repeat 1 different 1000 plain_counter 0.6091844770126045 counter_with_counter 1.232734862016514 counter_with_default_dict 0.7378899219911546 counter_with_exceptions 1.4480015779845417 repeat 10 different 100 plain_counter 0.4949962190585211 counter_with_counter 0.7886336819501594 counter_with_default_dict 0.4284116430208087 counter_with_exceptions 0.4748374510090798 repeat 20 different 50 plain_counter 0.4847069630632177 counter_with_counter 0.7627606929745525 counter_with_default_dict 0.4116779019823298 counter_with_exceptions 0.407719356007874 repeat 50 different 20 plain_counter 0.4709314970532432 counter_with_counter 0.7357207209570333 counter_with_default_dict 0.3903243549866602 counter_with_exceptions 0.36094399297144264