Amazing-Python-Scripts
31 строка · 1002.0 Байт
1import pandas as pd2import requests3from bs4 import BeautifulSoup4
5
6def scrapper():7headers = {8'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36'}9# accessing TOI webpage disguised as a browser10webpage = requests.get(11'https://timesofindia.indiatimes.com/', headers=headers).text12
13soup = BeautifulSoup(webpage, 'lxml')14news = []15link_list = []16
17for i in soup.find_all('div', class_='col_l_6'):18figcaption = i.find('figcaption')19if figcaption is not None:20# finding news headline as well its corresponding link21link_news = i.find('a').get("href")22text_news = figcaption.text.strip()23
24news.append(text_news)25link_list.append(link_news)26df = pd.DataFrame({'News_Headline': news, 'News_Link': link_list})27return df28
29
30TOI_headline = scrapper()31print(TOI_headline)32