selenium

How to Render a Html Page with Selenium Webdriver + PhantomJS in Python

Posted on Updated on

For a better view, check out the Github link.

#
# 20160929 - by sphinxid - firman.gautama@gmail.com
#
# Example of multithreaded selenium webdriver with phantomjs in Python.
# In this example, it will use 10 thread + 10 phantomjs to do 25000 request to "url".
# 

from selenium import webdriver
import time
import concurrent.futures
import signal
from concurrent.futures import ThreadPoolExecutor
from random import randint

def fetch(url, driver):
 try:
 if not driver.current_url:
 driver.refresh()
 else:
 driver.get(url)

 driver.implicitly_wait(2)
 driver.set_page_load_timeout(2)
 print 1
 except:
 print 2
 pass

 return 0

def clean_up(driver):
 try:
 driver.service.process.send_signal(signal.SIGTERM)
 driver.quit()
 except:
 pass

 return

if __name__ == "__main__":
 num_thread = 10
 num_request = 25000
 url = "http://www.yahoo.com/"

 # instantiate threadpool
 pool = ThreadPoolExecutor(num_thread)
 parr = []

 # instantiate PhantomJS per THread
 for x in range(0, num_thread):
 print "Initialized thread %s " % x
 parr.append(webdriver.PhantomJS())
 print " OK."

 start_time = time.time()

 # Use one random thread from thread pool to access the URL
 for x in range(0, num_request-1):
 n = randint(0, (num_thread-1))
 future = pool.submit(fetch, url, parr[n])
 future.done()

 # clean_up: make sure phantomjs process is closed
 for x in range(0, num_thread):
 future = pool.submit(clean_up, parr[x])
 future.done()