Solution: thread URL requests.
examples/parallel/fetch_urls_threads.py
import time import threading import requests import sys from bs4 import BeautifulSoup from fetch_urls import get_urls, get_title titles = [] locker = threading.Lock() class GetURLs(threading.Thread): def __init__(self, urls): threading.Thread.__init__(self) self.urls = urls def run(self): my_titles = [] for url in self.urls: title, err = get_title(url) my_titles.append({ 'url': url, 'title': title, 'err': err, }) locker.acquire() titles.extend(my_titles) locker.release() return def main(): if len(sys.argv) < 3: exit(f"Usage: {sys.argv[0]} LIMIT THREADS") limit = int(sys.argv[1]) threads_count = int(sys.argv[2]) urls = get_urls(limit) print(urls) start_time = time.time() batch_size = int(limit/threads_count) left_over = limit % threads_count batches = [] end = 0 for ix in range(threads_count): start = end end = start + batch_size if ix < left_over: end += 1 batches.append(urls[start:end]) threads = [ GetURLs(batches[ix]) for ix in range(threads_count) ] [ t.start() for t in threads ] [ t.join() for t in threads ] end_time = time.time() print("Elapsed time: {} for {} pages.".format(end_time-start_time, len(urls))) print(titles) if __name__ == '__main__': main()