Amazing-Python-Scripts
88 строк · 3.1 Кб
1from selenium import webdriver
2from email_validator import validate_email, EmailNotValidError
3import csv
4
5
6def LinkedInEmailScraper(userEmail, userPassword):
7emailList = {}
8
9browser = webdriver.Chrome()
10# example => 'https://www.linkedin.com/posts/faangpath_hiring-womxn-ghc2020-activity-6721287139721650176-QFCV/'
11url = '[INSERT URL TO LINKEDIN POST]'
12browser.get(url) # visits page of the desired post
13
14browser.implicitly_wait(5)
15
16commentDiv = browser.find_element_by_xpath(
17'/html/body/main/section[1]/section[1]/div/div[3]/a[2]'
18) # finds comment button
19loginLink = commentDiv.get_attribute('href')
20browser.get(loginLink)
21
22email = browser.find_element_by_xpath('//*[@id="username"]')
23password = browser.find_element_by_xpath('//*[@id="password"]')
24email.send_keys(userEmail) # inputs email in email field
25password.send_keys(userPassword) # inputs password in password field
26submit = browser.find_element_by_xpath(
27'//*[@id="app__container"]/main/div[3]/form/div[3]/button')
28submit.submit() # submits form
29
30browser.implicitly_wait(5)
31
32commentSection = browser.find_element_by_css_selector(
33'.comments-comments-list') # finds the comments section
34
35for _ in range(
363
37): # this can also be set to any number or "while True" if you want it to search through the whole comment section of the post
38try:
39moreCommentsButton = commentSection.find_element_by_class_name(
40'comments-comments-list__show-previous-container'
41).find_element_by_tag_name('button')
42moreCommentsButton.click()
43browser.implicitly_wait(5)
44except:
45print('End of checking comments')
46break
47
48browser.implicitly_wait(20)
49
50comments = commentSection.find_elements_by_tag_name(
51'article') # finds all individual comments
52
53for comment in comments:
54try:
55commenterName = comment.find_element_by_class_name(
56'hoverable-link-text') # finds name of commenter
57commentText = comment.find_element_by_tag_name('p')
58commenterEmail = commentText.find_element_by_tag_name(
59'a').get_attribute('innerHTML') # finds email of commenter
60# validates email address
61validEmail = validate_email(commenterEmail)
62commenterEmail = validEmail.email
63except:
64continue
65
66emailList[commenterName.get_attribute('innerHTML')] = commenterEmail
67
68browser.quit()
69return emailList
70
71
72def DictToCSV(input_dict):
73'''
74Converts dictionary into csv
75'''
76with open('./LinkedIn Email Scraper/emails.csv', 'w') as f:
77f.write('name,email\n')
78for key in input_dict:
79f.write('%s,%s\n' % (key, input_dict[key]))
80f.close()
81
82
83if __name__ == '__main__':
84userEmail = '[INSERT YOUR EMAIL ADDRESS FOR LINKEDIN ACCOUNT]'
85userPassword = '[INSERT YOUR PASSWORD FOR LINKEDIN ACCOUNT'
86
87emailList = LinkedInEmailScraper(userEmail, userPassword)
88DictToCSV(emailList)
89