Today we will be looking at an implementation of the Selenium library to first scrape LinkedIn Jobs postings by Title and Location, and then return Title, Location, Description, as well as the Company who posted the Job. To do so, all you need to do is input your credentials below, as well as what Job Title you are looking for, where you are looking, and how many pages worth of scraping you would like to do on LinkedIn.
Once these jobs are scraped, we will then call on Natural Language Processing in combination with sklearn's CountVectorizer and cosine_similarity to determine how efficient our resume may be in applying for the job.
This project can be further adapted to perform analysis on the contents of the resume, as well as create visualizations, such as word clouds, to show the most common skills required of a given job; i.e. a Data Scientist likely will mention analysis, Python, and Data quite often.
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
from pathlib import Path
import requests
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import docx2txt
import PyPDF2
import textract
job_titles = []
companies = []
locations = []
job_descs = []
keywords = ["INPUT KEY WORDS HERE"]
location = ["INPUTE A LOCATION HERE"]
email = "INPUT YOUR LINKEDIN EMAIL HERE"
password = "INPUT YOUR PASSWORD HERE"
resume=r"C:\Users\musta\OneDrive\Resumes NEW\Michael_Adams_Resume_2022.pdf" #CHANGE THIS TO YOUR RESUME'S PATH - NOTE, this takes both PDF & Word Files.
pages = #enter an integer (Try 2 at first, due to loading of web pages it will take a couple of minutes to run)
def LI_Scraper(keywords, location, email, password, pages):
# direct the webdriver to where the browser file is:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
# your secret credentials:
driver.maximize_window()
# Go to linkedin and login
driver.get('https://www.linkedin.com/login')
time.sleep(3)
driver.find_element(By.ID,'username').send_keys(email)
driver.find_element(By.ID,'password').send_keys(password)
driver.find_element(By.ID,'password').send_keys(Keys.RETURN)
driver.get("https://www.linkedin.com/jobs/")
time.sleep(3)
# find the keywords/location search bars:
search_bars = driver.find_elements(By.CLASS_NAME,'jobs-search-box__text-input')
search_keywords = search_bars[0]
#time.sleep(3)
search_bars[0].click()
time.sleep(3)
search_bars[0].send_keys(keywords)
time.sleep(3)
search_bars[0].send_keys(Keys.TAB)
time.sleep(3)
search_bars[3].send_keys(location)
time.sleep(3)
search_bars[3].send_keys(Keys.RETURN)
time.sleep(5)
#earch_url = driver.current_url
#job = driver.find_elements(By.CLASS_NAME, "job-card-list__title")
#job_titles = []
# companies = []
#locations = []
#job_descs = []
for page in range(1,int(pages)):
time.sleep(3)
if page > 1:
driver.find_element(By.XPATH,f'/html/body/div[5]/div[3]/div[3]/div[2]/div/section[1]/div/div/section/div/ul/li[{page}]/button').click()
time.sleep(3)
search_url = driver.current_url
job = driver.find_elements(By.CLASS_NAME, "job-card-list__title")
for j in job:
print(j.text)
job_titles.append(j.text)
#print(c)
company = driver.find_elements(By.CLASS_NAME, "job-card-container__company-name")
for c in company:
print(c.text)
companies.append(c.text)
location = driver.find_elements(By.CLASS_NAME, "job-card-container__metadata-item")
for l in location:
print(l.text)
locations.append(l.text)
for j in range(1, len(job)+1):
# job click
time.sleep(1)
element = driver.find_element(By.XPATH,f'/html/body/div[5]/div[3]/div[3]/div[2]/div/section[1]/div/div/ul/li[{j}]/div/div/div[1]/div[2]/div[1]/a')
# driver.execute_script("arguments[0].scrollIntoView()", element)
time.sleep(1)
element.click()
# waiting load
time.sleep(2)
job_description = driver.find_element(By.CLASS_NAME, f'jobs-description__container').text
#job_description = job_description.replace('\n','')
job_descs.append(job_description)
time.sleep(5)
#return(job_descs)
def read_pdf_resume(file):
'''Opens & reads in a PDF file from path'''
fileReader = PyPDF2.PdfFileReader(open(file,'rb'))
page_count = fileReader.getNumPages()
text = [fileReader.getPage(i).extractText() for i in range(page_count)]
return str(text).replace("\\n", "")
def read_word_resume(filepath):
'''Opens & reads in a .doc or .docx file from path'''
txt = textract.process(filepath).decode('utf-8')
return txt.replace('\n', ' ').replace('\t', ' ')
def read_resume(infile):
'''Takes an infile and attempts to read it as docx file,
if it is pdf will read as pdf as read_word_resume will not work'''
try:
resume = read_word_resume(infile)
except:
resume = read_pdf_resume(infile)
text_resume = str(resume)
return(text_resume)
def get_resume_score(text):
cv = CountVectorizer(stop_words='english')
count_matrix = cv.fit_transform(text)
#Print the similarity scores
#print("\nSimilarity Scores:")
#get the match percentage
matchPercentage = cosine_similarity(count_matrix)[0][1] * 100
matchPercentage = round(matchPercentage, 2) # round to two decimal
#print("Your resume matches about "+ str(matchPercentage)+ "% of the job description.")
score_list.append(matchPercentage)
LI_Scraper(keywords, location, email, password, pages)
====== WebDriver manager ====== Current google-chrome version is 99.0.4844 Get LATEST chromedriver version for 99.0.4844 google-chrome Driver [C:\Users\musta\.wdm\drivers\chromedriver\win32\99.0.4844.51\chromedriver.exe] found in cache
Data Scientist Data Scientist Data Scientist, Product and Engineering Data Scientist, Basketball Integrity Data Scientist Data Scientist Data Scientist WoodSpoon Newsday Media Group Notion National Basketball Association (NBA) Appian Infotech Inc. CVS Health Jobot New York, United States Hybrid Medical, Vision, Dental Melville, NY New York, NY On-site New York County, NY On-site United States Remote New York, NY On-site New York, NY Remote
score_list = []
def see_results():
for job_desc in job_descs:
#print(job_desc)
resume_text = read_resume(resume)
for job_desc in job_descs:
text = [resume_text, job_desc]
get_resume_score(text)
for score, company, job_title in zip(score_list, companies, job_titles):
print(f"Your resume matches approximately {score}% of the {job_title} position at {company}.")
see_results()
Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA). Your resume matches approximately 20.87% of the Data Scientist position at Transit Wireless. Your resume matches approximately 36.64% of the Data Scientist position at Underdog.io. Your resume matches approximately 15.98% of the Data Scientist position at Consumer Reports. Your resume matches approximately 27.01% of the Data Scientist, Outsights position at Tempus Labs, Inc.. Your resume matches approximately 27.51% of the Data Scientist position at CVS Health. Your resume matches approximately 12.54% of the Data Scientist - B2B Payments position at Bank of America. Your resume matches approximately 28.94% of the Data Scientist, Basketball Integrity position at National Basketball Association (NBA).