Month: October 2016
For a better view, check out the Github link.
# # 20160929 - by sphinxid - firstname.lastname@example.org # # Example of multithreaded selenium webdriver with phantomjs in Python. # In this example, it will use 10 thread + 10 phantomjs to do 25000 request to "url". # from selenium import webdriver import time import concurrent.futures import signal from concurrent.futures import ThreadPoolExecutor from random import randint def fetch(url, driver): try: if not driver.current_url: driver.refresh() else: driver.get(url) driver.implicitly_wait(2) driver.set_page_load_timeout(2) print 1 except: print 2 pass return 0 def clean_up(driver): try: driver.service.process.send_signal(signal.SIGTERM) driver.quit() except: pass return if __name__ == "__main__": num_thread = 10 num_request = 25000 url = "http://www.yahoo.com/" # instantiate threadpool pool = ThreadPoolExecutor(num_thread) parr =  # instantiate PhantomJS per THread for x in range(0, num_thread): print "Initialized thread %s " % x parr.append(webdriver.PhantomJS()) print " OK." start_time = time.time() # Use one random thread from thread pool to access the URL for x in range(0, num_request-1): n = randint(0, (num_thread-1)) future = pool.submit(fetch, url, parr[n]) future.done() # clean_up: make sure phantomjs process is closed for x in range(0, num_thread): future = pool.submit(clean_up, parr[x]) future.done()