Amazing-Python-Scripts

Форк
0
88 строк · 3.1 Кб
1
from selenium import webdriver
2
from email_validator import validate_email, EmailNotValidError
3
import csv
4

5

6
def LinkedInEmailScraper(userEmail, userPassword):
7
    emailList = {}
8

9
    browser = webdriver.Chrome()
10
    # example => 'https://www.linkedin.com/posts/faangpath_hiring-womxn-ghc2020-activity-6721287139721650176-QFCV/'
11
    url = '[INSERT URL TO LINKEDIN POST]'
12
    browser.get(url)  # visits page of the desired post
13

14
    browser.implicitly_wait(5)
15

16
    commentDiv = browser.find_element_by_xpath(
17
        '/html/body/main/section[1]/section[1]/div/div[3]/a[2]'
18
    )  # finds comment button
19
    loginLink = commentDiv.get_attribute('href')
20
    browser.get(loginLink)
21

22
    email = browser.find_element_by_xpath('//*[@id="username"]')
23
    password = browser.find_element_by_xpath('//*[@id="password"]')
24
    email.send_keys(userEmail)  # inputs email in email field
25
    password.send_keys(userPassword)  # inputs password in password field
26
    submit = browser.find_element_by_xpath(
27
        '//*[@id="app__container"]/main/div[3]/form/div[3]/button')
28
    submit.submit()  # submits form
29

30
    browser.implicitly_wait(5)
31

32
    commentSection = browser.find_element_by_css_selector(
33
        '.comments-comments-list')  # finds the comments section
34

35
    for _ in range(
36
            3
37
    ):  # this can also be set to any number or "while True" if you want it to search through the whole comment section of the post
38
        try:
39
            moreCommentsButton = commentSection.find_element_by_class_name(
40
                'comments-comments-list__show-previous-container'
41
            ).find_element_by_tag_name('button')
42
            moreCommentsButton.click()
43
            browser.implicitly_wait(5)
44
        except:
45
            print('End of checking comments')
46
            break
47

48
    browser.implicitly_wait(20)
49

50
    comments = commentSection.find_elements_by_tag_name(
51
        'article')  # finds all individual comments
52

53
    for comment in comments:
54
        try:
55
            commenterName = comment.find_element_by_class_name(
56
                'hoverable-link-text')  # finds name of commenter
57
            commentText = comment.find_element_by_tag_name('p')
58
            commenterEmail = commentText.find_element_by_tag_name(
59
                'a').get_attribute('innerHTML')  # finds email of commenter
60
            # validates email address
61
            validEmail = validate_email(commenterEmail)
62
            commenterEmail = validEmail.email
63
        except:
64
            continue
65

66
        emailList[commenterName.get_attribute('innerHTML')] = commenterEmail
67

68
    browser.quit()
69
    return emailList
70

71

72
def DictToCSV(input_dict):
73
    '''
74
    Converts dictionary into csv 
75
    '''
76
    with open('./LinkedIn Email Scraper/emails.csv', 'w') as f:
77
        f.write('name,email\n')
78
        for key in input_dict:
79
            f.write('%s,%s\n' % (key, input_dict[key]))
80
        f.close()
81

82

83
if __name__ == '__main__':
84
    userEmail = '[INSERT YOUR EMAIL ADDRESS FOR LINKEDIN ACCOUNT]'
85
    userPassword = '[INSERT YOUR PASSWORD FOR LINKEDIN ACCOUNT'
86

87
    emailList = LinkedInEmailScraper(userEmail, userPassword)
88
    DictToCSV(emailList)
89

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.