Amazing-Python-Scripts
135 строк · 3.8 Кб
1from selenium import webdriver2from selenium.webdriver.common.keys import Keys3from selenium.webdriver.firefox.options import Options as Options_firefox4from selenium.webdriver.chrome.options import Options as Options_chrome5from email.mime.text import MIMEText6from configparser import ConfigParser7import smtplib8
9newsletter_file = 'newsletter.txt'10config_file = 'config.ini'11config = ConfigParser()12config.read(config_file)13
14
15def scrape_news():16
17# get user settings18driver = config.get('your_settings', 'driver')19search_topic = config.get('your_settings', 'search_topic')20
21# set up driver22PATH_TO_DRIVER = "./%s" % driver23
24if driver == 'geckodriver':25firefox_options = Options_firefox()26
27# run in headless mode28firefox_options.headless = True29
30# disable cookies to prevent popups31firefox_pref = webdriver.FirefoxProfile()32firefox_pref.set_preference("network.cookie.cookieBehavior", 2)33
34browser = webdriver.Firefox(executable_path=PATH_TO_DRIVER,35options=firefox_options,36firefox_profile=firefox_pref)37
38elif driver == 'chromedriver':39chrome_options = Options_chrome()40
41# run in headless mode42chrome_options.add_argument('--headless')43
44# disable cookies to prevent popups45chrome_options.add_experimental_option(46'prefs', {'profile.default_content_setting_values.cookies': 2})47
48browser = webdriver.Chrome(executable_path=PATH_TO_DRIVER,49options=chrome_options)50
51else:52print('ERROR: driver not supported')53
54print('Getting search results...')55
56# open URL57browser.get('https://google.com')58
59# select google search bar60google_search = browser.find_element_by_name('q')61
62# type news topic to search63google_search.send_keys(search_topic)64google_search.send_keys(Keys.ENTER)65
66browser.implicitly_wait(5)67
68browser.find_element_by_css_selector('a[data-sc="N"]').click()69
70browser.implicitly_wait(5)71
72# get all elements containing news title73all_headings = browser.find_elements_by_xpath(74'//div[contains(@role, "heading") and contains(@aria-level, "2")]')75
76# get all elements containing links for each news title77all_links = browser.find_elements_by_xpath('//g-card/div/div/div[2]/a')78
79# open file for writing80file = open(newsletter_file, 'w')81
82# loop over each title and link, print each to the file83for heading, link in zip(all_headings, all_links):84file.write('\n\n')85file.write(heading.text)86file.write('\n')87file.write(link.get_attribute('href'))88
89browser.close()90print('Done. Search results exported to "newsletter.txt"')91
92pass93
94
95def send_email():96
97print('Sending email...')98
99# get user settings100email_subject = config.get('your_settings', 'email_subject')101email_smtp = config.get('your_settings', 'email_smtp')102sender_email_address = config.get('your_settings', 'sender_email_address')103email_password = config.get('your_settings', 'email_password')104receiver_email_address = config.get('your_settings',105'receiver_email_address')106
107# newsletter file will be sent by email108with open(newsletter_file, 'r') as file:109file_content = file.read()110
111# configure mail112message = MIMEText(file_content)113message['Subject'] = email_subject114message['From'] = sender_email_address115message['To'] = receiver_email_address116
117# set smtp server118server = smtplib.SMTP(email_smtp, '587')119server.ehlo()120server.starttls()121
122# send email123server.login(sender_email_address, email_password)124server.send_message(message)125server.quit()126
127print("Email sent!")128
129pass130
131
132if __name__ == "__main__":133scrape_news()134send_email()135pass136