Web client
examples/twisted/web_client.py
from twisted.internet import reactor from twisted.web.client import getPage import sys import re import time queue = [ 'http://docs.python.org/3/', 'http://docs.python.org/3/whatsnew/3.3.html', 'http://docs.python.org/3/tutorial/index.html', 'http://docs.python.org/3/library/index.html', 'http://docs.python.org/3/reference/index.html' 'http://docs.python.org/3/howto/index.html', 'http://docs.python.org/3/howto/pyporting.html', 'http://docs.python.org/3/howto/cporting.html', 'http://docs.python.org/3/howto/curses.html', 'http://docs.python.org/3/howto/descriptor.html', 'http://docs.python.org/3/howto/functional.html', 'http://docs.python.org/3/howto/logging.html', 'http://docs.python.org/3/howto/logging-cookbook.html', 'http://docs.python.org/3/howto/regex.html', 'http://docs.python.org/3/howto/sockets.html', 'http://docs.python.org/3/howto/sorting.html', 'http://docs.python.org/3/howto/unicode.html', 'http://docs.python.org/3/howto/urllib2.html', 'http://docs.python.org/3/howto/webservers.html', 'http://docs.python.org/3/howto/argparse.html', 'http://docs.python.org/3/howto/ipaddress.html', ] max_parallel = 3 current_parallel = 0 if len(sys.argv) == 2: max_parallel = int(sys.argv[1]) def printPage(result): print("page size: ", len(result)) global current_parallel current_parallel -= 1 print("current_parallel: ", current_parallel) #urls = re.findall(r'href="([^"]+)"', result) #for u in urls: # queue.append(u) #queue.extend(urls) process_queue() def printError(error): print("Error: ", error) global current_parallel current_parallel -= 1 process_queue() def stop(result): reactor.stop() def process_queue(): global current_parallel, max_parallel,queue print("process_queue cs: {} max: {}".format(current_parallel, max_parallel)) while True: if current_parallel >= max_parallel: print("No empty slot") return if len(queue) == 0: print("queue is empty") if current_parallel == 0: reactor.stop() return url = queue[0] + '?' + str(time.time()) queue[0:1] = [] current_parallel += 1 d = getPage(url) d.addCallbacks(printPage, printError) process_queue() reactor.run() print("----done ---")