Amazing-Python-Scripts

Форк
0
94 строки · 2.3 Кб
1
from bs4 import BeautifulSoup
2
import requests
3
import csv
4

5
URL = "https://www.indiatoday.in/"
6

7

8
def writeToCSV(topTenNews, category):
9
    with open("topTen" + category + "News.csv", "w") as file:
10
        writer = csv.writer(file)
11
        writer.writerow(["Date", "Link", "Headline"])
12
        for news in topTenNews:
13
            writer.writerow(
14
                [news[2], "https://www.indiatoday.in/" + news[1], news[0]])
15

16

17
def getTopTenFromDivTag(category):
18
    topTenNews = []
19
    count = 0
20
    category_url = URL + category
21

22
    page = requests.get(category_url)
23
    soup = BeautifulSoup(page.text, "html.parser")
24

25
    all_div_tags = soup.find_all(class_="detail")
26

27
    for div in all_div_tags:
28
        count += 1
29
        if count > 10:
30
            break
31
        headline = div.find("h2").text
32
        link = div.find("a").attrs["href"]
33
        date = div.find("a").attrs["href"][-10:]
34
        topTenNews.append([headline, link, date])
35

36
    return topTenNews
37

38

39
def getTopTenFromLiTag(category):
40
    topTenNews = []
41
    count = 0
42
    category_url = URL + category
43

44
    page = requests.get(category_url)
45
    soup = BeautifulSoup(page.text, "html.parser")
46

47
    ul_tag = soup.find_all(class_="itg-listing")
48
    ul_tag = str(ul_tag)[25:-6]
49
    li_tags = ul_tag.split("</li>")
50

51
    for li in li_tags:
52
        count += 1
53
        if count > 10:
54
            break
55
        ele = li.split(">")
56
        link = ele[1].split("=")[1][2:-1]
57
        headline = ele[2][:-3]
58
        date = link[-10:]
59
        topTenNews.append([headline, link, date])
60

61
    return topTenNews
62

63

64
def main():
65

66
    categories = ["india", "world", "cities", "business", "health", "technology", "sports",
67
                  "education", "lifestyle"]
68

69
    print("Please Choose a Category from the following list")
70

71
    for index, category in enumerate(categories):
72
        print(str(index + 1) + ". " + category.capitalize())
73

74
    print("Example: Enter 'world' for top 10 world news")
75
    print()
76

77
    category = input()
78
    category = category.lower()
79

80
    if category not in categories:
81
        print("\nPlease choose a valid category!")
82
        exit()
83

84
    if category in categories[:5]:
85
        topTenNews = getTopTenFromDivTag(category)
86
    else:
87
        topTenNews = getTopTenFromLiTag(category)
88

89
    writeToCSV(topTenNews, category)
90

91
    print("Created CSV File Successfully!")
92

93

94
main()
95

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.