Amazing-Python-Scripts

Форк
0
/
google-search-newsletter.py 
135 строк · 3.8 Кб
1
from selenium import webdriver
2
from selenium.webdriver.common.keys import Keys
3
from selenium.webdriver.firefox.options import Options as Options_firefox
4
from selenium.webdriver.chrome.options import Options as Options_chrome
5
from email.mime.text import MIMEText
6
from configparser import ConfigParser
7
import smtplib
8

9
newsletter_file = 'newsletter.txt'
10
config_file = 'config.ini'
11
config = ConfigParser()
12
config.read(config_file)
13

14

15
def scrape_news():
16

17
    # get user settings
18
    driver = config.get('your_settings', 'driver')
19
    search_topic = config.get('your_settings', 'search_topic')
20

21
    # set up driver
22
    PATH_TO_DRIVER = "./%s" % driver
23

24
    if driver == 'geckodriver':
25
        firefox_options = Options_firefox()
26

27
        # run in headless mode
28
        firefox_options.headless = True
29

30
        # disable cookies to prevent popups
31
        firefox_pref = webdriver.FirefoxProfile()
32
        firefox_pref.set_preference("network.cookie.cookieBehavior", 2)
33

34
        browser = webdriver.Firefox(executable_path=PATH_TO_DRIVER,
35
                                    options=firefox_options,
36
                                    firefox_profile=firefox_pref)
37

38
    elif driver == 'chromedriver':
39
        chrome_options = Options_chrome()
40

41
        # run in headless mode
42
        chrome_options.add_argument('--headless')
43

44
        # disable cookies to prevent popups
45
        chrome_options.add_experimental_option(
46
            'prefs', {'profile.default_content_setting_values.cookies': 2})
47

48
        browser = webdriver.Chrome(executable_path=PATH_TO_DRIVER,
49
                                   options=chrome_options)
50

51
    else:
52
        print('ERROR: driver not supported')
53

54
    print('Getting search results...')
55

56
    # open URL
57
    browser.get('https://google.com')
58

59
    # select google search bar
60
    google_search = browser.find_element_by_name('q')
61

62
    # type news topic to search
63
    google_search.send_keys(search_topic)
64
    google_search.send_keys(Keys.ENTER)
65

66
    browser.implicitly_wait(5)
67

68
    browser.find_element_by_css_selector('a[data-sc="N"]').click()
69

70
    browser.implicitly_wait(5)
71

72
    # get all elements containing news title
73
    all_headings = browser.find_elements_by_xpath(
74
        '//div[contains(@role, "heading") and contains(@aria-level, "2")]')
75

76
    # get all elements containing links for each news title
77
    all_links = browser.find_elements_by_xpath('//g-card/div/div/div[2]/a')
78

79
    # open file for writing
80
    file = open(newsletter_file, 'w')
81

82
    # loop over each title and link, print each to the file
83
    for heading, link in zip(all_headings, all_links):
84
        file.write('\n\n')
85
        file.write(heading.text)
86
        file.write('\n')
87
        file.write(link.get_attribute('href'))
88

89
    browser.close()
90
    print('Done. Search results exported to "newsletter.txt"')
91

92
    pass
93

94

95
def send_email():
96

97
    print('Sending email...')
98

99
    # get user settings
100
    email_subject = config.get('your_settings', 'email_subject')
101
    email_smtp = config.get('your_settings', 'email_smtp')
102
    sender_email_address = config.get('your_settings', 'sender_email_address')
103
    email_password = config.get('your_settings', 'email_password')
104
    receiver_email_address = config.get('your_settings',
105
                                        'receiver_email_address')
106

107
    # newsletter file will be sent by email
108
    with open(newsletter_file, 'r') as file:
109
        file_content = file.read()
110

111
    # configure mail
112
    message = MIMEText(file_content)
113
    message['Subject'] = email_subject
114
    message['From'] = sender_email_address
115
    message['To'] = receiver_email_address
116

117
    # set smtp server
118
    server = smtplib.SMTP(email_smtp, '587')
119
    server.ehlo()
120
    server.starttls()
121

122
    # send email
123
    server.login(sender_email_address, email_password)
124
    server.send_message(message)
125
    server.quit()
126

127
    print("Email sent!")
128

129
    pass
130

131

132
if __name__ == "__main__":
133
    scrape_news()
134
    send_email()
135
    pass
136

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.