Amazing-Python-Scripts
119 строк · 4.2 Кб
1# for scraping books
2from bs4 import BeautifulSoup as bs3import requests4# to identify emoji unicode characters
5import demoji6
7
8def link_to_get(link):9"""This function will get the url of the image & book download direct link using the given link for book download"""10response = requests.get(link)11th_html = bs(response.text, "html.parser")12td_all = th_html.find_all("td", id="info")13td_all = td_all[0]14td_a = td_all.find_all("a")15link_href = td_a[1].get("href")16img_link_td = td_all.find("img", alt="cover")17img_link_src = img_link_td.get("src")18img_link = f"http://library.lol{img_link_src}"19return [link_href, img_link]20
21
22def book_get(name, mainres=25, results=5):23"""This function returns the list of books for the given name24
25You can give in name :
261. title of book
272. isbn of book
283. author of book
294. publisher of book
30
31Results:
32[ 0.Book Name,
331.Author,
342.Publisher,
353.Size,
364.Book Type,
375.Book Link,
386.Book Image Link
397.Language]"""
40
41Books = []42name = demoji.replace(name, '')43if name == "":44return "Error: enter name"45name = name.replace(" ", "+")46# getting request and response47url = f"http://libgen.is/search.php?req={name}&lg_topic=libgen&open=0&view=simple&res={mainres}&phrase=1&column=def"48response = requests.get(url)49bs_html = bs(response.text, "html.parser")50
51if "Search string must contain minimum 3 characters.." in bs_html.body:52return "Error: Title Too Short"53
54# scraping the site for response55table = bs_html.find_all("table")56table = table[2]57table_rows = table.find_all("tr")58a = len(table_rows)59table_rows.pop(0)60if a > 1:61counter = 062for i in table_rows:63if counter <= results:64# make book list65book_lst = []66# getting all table datas67table_datas = i.find_all("td")68# book name69book_name = table_datas[2].get_text()70# author name71author = table_datas[1].get_text()72# publisher name73publisher = table_datas[3].get_text()74if publisher == "":75publisher = "unknown"76# getting link to book77link_row = table_datas[9]78a = link_row.find("a", href=True)79link = a.get("href")80# getting image url & direct book download link81link_all = link_to_get(link)82# getting language83language_row = table_datas[6]84language = language_row.get_text()85# getting size of book86size_row = table_datas[7]87size = size_row.get_text()88# getting type of book89type_row = table_datas[8]90type_ofit = type_row.get_text()91# this will only take pdfs in English Language92if (type_ofit != "pdf" and type_ofit != "epub") or language != "English":93continue94book_lst.append(book_name)95book_lst.append(author)96book_lst.append(publisher)97book_lst.append(size)98book_lst.append(type_ofit)99book_lst.append(link_all[0])100book_lst.append(link_all[1])101book_lst.append(language)102Books.append(book_lst)103counter += 1104if len(Books) >= 1:105return Books106else:107return "Error: no results found"108else:109return "Error: no results found"110
111
112if __name__ == "__main__":113a = book_get("Python", 25, 5)114if "Error" not in a:115for i in a:116print(117f"\n\nName : {i[0]}\nAuthor : {i[1]}\nPublisher : {i[2]}\nSize : {i[3]}\nFormat : {i[4]}\nLink : {i[5]}\nImage : {i[6]}\n\n")118else:119print(a)120