Amazing-Python-Scripts

Форк
0
119 строк · 4.2 Кб
1
# for scraping books
2
from bs4 import BeautifulSoup as bs
3
import requests
4
# to identify emoji unicode characters
5
import demoji
6

7

8
def link_to_get(link):
9
    """This function will get the url of the image & book download direct link using the given link for book download"""
10
    response = requests.get(link)
11
    th_html = bs(response.text, "html.parser")
12
    td_all = th_html.find_all("td", id="info")
13
    td_all = td_all[0]
14
    td_a = td_all.find_all("a")
15
    link_href = td_a[1].get("href")
16
    img_link_td = td_all.find("img", alt="cover")
17
    img_link_src = img_link_td.get("src")
18
    img_link = f"http://library.lol{img_link_src}"
19
    return [link_href, img_link]
20

21

22
def book_get(name, mainres=25, results=5):
23
    """This function returns the list of books for the given name
24

25
        You can give in name : 
26
                        1. title of book
27
                        2. isbn of book
28
                        3. author of book
29
                        4. publisher of book
30

31
        Results:
32
                    [   0.Book Name, 
33
                        1.Author,
34
                        2.Publisher, 
35
                        3.Size, 
36
                        4.Book Type, 
37
                        5.Book Link, 
38
                        6.Book Image Link
39
                        7.Language]"""
40

41
    Books = []
42
    name = demoji.replace(name, '')
43
    if name == "":
44
        return "Error: enter name"
45
    name = name.replace(" ", "+")
46
    # getting request and response
47
    url = f"http://libgen.is/search.php?req={name}&lg_topic=libgen&open=0&view=simple&res={mainres}&phrase=1&column=def"
48
    response = requests.get(url)
49
    bs_html = bs(response.text, "html.parser")
50

51
    if "Search string must contain minimum 3 characters.." in bs_html.body:
52
        return "Error: Title Too Short"
53

54
    # scraping the site for response
55
    table = bs_html.find_all("table")
56
    table = table[2]
57
    table_rows = table.find_all("tr")
58
    a = len(table_rows)
59
    table_rows.pop(0)
60
    if a > 1:
61
        counter = 0
62
        for i in table_rows:
63
            if counter <= results:
64
                # make book list
65
                book_lst = []
66
                # getting all table datas
67
                table_datas = i.find_all("td")
68
                # book name
69
                book_name = table_datas[2].get_text()
70
                # author name
71
                author = table_datas[1].get_text()
72
                # publisher name
73
                publisher = table_datas[3].get_text()
74
                if publisher == "":
75
                    publisher = "unknown"
76
                # getting link to book
77
                link_row = table_datas[9]
78
                a = link_row.find("a", href=True)
79
                link = a.get("href")
80
                # getting image url & direct book download link
81
                link_all = link_to_get(link)
82
                # getting language
83
                language_row = table_datas[6]
84
                language = language_row.get_text()
85
                # getting size of book
86
                size_row = table_datas[7]
87
                size = size_row.get_text()
88
                # getting type of book
89
                type_row = table_datas[8]
90
                type_ofit = type_row.get_text()
91
                # this will only take pdfs in English Language
92
                if (type_ofit != "pdf" and type_ofit != "epub") or language != "English":
93
                    continue
94
                book_lst.append(book_name)
95
                book_lst.append(author)
96
                book_lst.append(publisher)
97
                book_lst.append(size)
98
                book_lst.append(type_ofit)
99
                book_lst.append(link_all[0])
100
                book_lst.append(link_all[1])
101
                book_lst.append(language)
102
                Books.append(book_lst)
103
                counter += 1
104
        if len(Books) >= 1:
105
            return Books
106
        else:
107
            return "Error: no results found"
108
    else:
109
        return "Error: no results found"
110

111

112
if __name__ == "__main__":
113
    a = book_get("Python", 25, 5)
114
    if "Error" not in a:
115
        for i in a:
116
            print(
117
                f"\n\nName : {i[0]}\nAuthor : {i[1]}\nPublisher : {i[2]}\nSize : {i[3]}\nFormat : {i[4]}\nLink : {i[5]}\nImage : {i[6]}\n\n")
118
    else:
119
        print(a)
120

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.