Amazing-Python-Scripts

Форк
0
98 строк · 3.4 Кб
1
import requests
2
from bs4 import BeautifulSoup
3
from selenium import webdriver
4
from selenium.webdriver.common.keys import Keys
5
import time
6
import sqlite3
7
from sqlite3 import Error
8

9
# Function to connect to the SQL Database
10

11

12
def sql_connection():
13
    try:
14
        con = sqlite3.connect('./Udemy Scraper/udemyDatabase.db')
15
        return con
16
    except Error:
17
        print(Error)
18

19
# Function to create table
20

21

22
def sql_table(con):
23
    cursorObj = con.cursor()
24
    cursorObj.execute(
25
        "CREATE TABLE IF NOT EXISTS courses(title text, description text, instructor text,current_price INTEGER, original_price INTEGER, rating REAL, hours REAL, lectures INTEGER)")
26
    con.commit()
27

28

29
# Call functions to connect to database and create table
30
con = sql_connection()
31
sql_table(con)
32

33
# Function to insert into table
34

35

36
def sql_insert(con, entities):
37
    cursorObj = con.cursor()
38
    cursorObj.execute(
39
        'INSERT INTO courses(title, description, instructor, current_price, original_price, rating, hours, lectures) VALUES(?, ?, ?, ?, ?, ?, ?, ?)', entities)
40
    con.commit()
41

42

43
# Get chrome driver path
44
driver_path = input("Enter chrome driver path: ")
45

46
print("\nSome Categories Available on Udemy include:\nDevelopment -  Python, Web Development, Javascript, Java \nDesign - Photoshop, Blender, Graphic design\n")
47

48
# Get input for course category to scrape
49
category = input("Enter course category: ")
50

51
url = 'https://www.udemy.com/courses/search/?src=ukw&q={}'.format(category)
52

53
# initiating the webdriver. Parameter includes the path of the webdriver.
54
driver = webdriver.Chrome(driver_path)
55
driver.get(url)
56

57
# this is just to ensure that the page is loaded
58
time.sleep(5)
59
html = driver.page_source
60

61
# Now apply bs4 to html variable
62
soup = BeautifulSoup(html, "html.parser")
63
course_divs = soup.find_all(
64
    "div", {"class": "course-card--container--3w8Zm course-card--large--1BVxY"})
65

66
# Get all course divs and extract information from individual divs
67
for course_div in course_divs:
68
    title = course_div.find("div", {
69
                            "class": "udlite-focus-visible-target udlite-heading-md course-card--course-title--2f7tE"}).text.strip()
70
    description = course_div.find(
71
        "p", {"class": "udlite-text-sm course-card--course-headline--yIrRk"}).text.strip()
72
    instructor = course_div.find(
73
        "div", {"class": "udlite-text-xs course-card--instructor-list--lIA4f"}).text.strip()
74

75
    current_price = course_div.find(
76
        "div", {"class": "price-text--price-part--Tu6MH course-card--discount-price--3TaBk udlite-heading-md"}).text.strip()
77
    current_price = current_price.replace("Current price₹", "")
78

79
    original_price = course_div.find(
80
        "div", {"class": "price-text--price-part--Tu6MH price-text--original-price--2e-F5 course-card--list-price--2AO6G udlite-text-sm"}).text.strip()
81
    original_price = original_price.replace("Original Price₹", "")
82

83
    rating = course_div.find("span", {
84
                             "class": "udlite-heading-sm star-rating--rating-number--3lVe8"}).text.strip()
85

86
    hours = course_div.find_all(
87
        "span", {"class": "course-card--row--1OMjg"})[0].text.strip().split()[0]
88

89
    lectures = course_div.find_all(
90
        "span", {"class": "course-card--row--1OMjg"})[1].text.strip().split()[0]
91

92
    entities = (title, description, instructor, current_price,
93
                original_price, rating, hours, lectures)
94
    sql_insert(con, entities)
95

96
print("Saved successfully in database!")
97

98
driver.close()  # closing the webdriver
99

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.