Amazing-Python-Scripts

TOI_Scrapper.py

31 строка · 1002.0 Байт

Перенос по словам

1
import pandas as pd
2
import requests
3
from bs4 import BeautifulSoup
4

5

6
def scrapper():
7
    headers = {
8
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36'}
9
    # accessing TOI webpage disguised as a browser
10
    webpage = requests.get(
11
        'https://timesofindia.indiatimes.com/', headers=headers).text
12

13
    soup = BeautifulSoup(webpage, 'lxml')
14
    news = []
15
    link_list = []
16

17
    for i in soup.find_all('div', class_='col_l_6'):
18
        figcaption = i.find('figcaption')
19
        if figcaption is not None:
20
            # finding news headline as well its corresponding link
21
            link_news = i.find('a').get("href")
22
            text_news = figcaption.text.strip()
23

24
            news.append(text_news)
25
            link_list.append(link_news)
26
    df = pd.DataFrame({'News_Headline': news, 'News_Link': link_list})
27
    return df
28

29

30
TOI_headline = scrapper()
31
print(TOI_headline)
32

Amazing-Python-Scripts

Использование cookies