Amazing-Python-Scripts
98 строк · 3.4 Кб
1import requests2from bs4 import BeautifulSoup3from selenium import webdriver4from selenium.webdriver.common.keys import Keys5import time6import sqlite37from sqlite3 import Error8
9# Function to connect to the SQL Database
10
11
12def sql_connection():13try:14con = sqlite3.connect('./Udemy Scraper/udemyDatabase.db')15return con16except Error:17print(Error)18
19# Function to create table
20
21
22def sql_table(con):23cursorObj = con.cursor()24cursorObj.execute(25"CREATE TABLE IF NOT EXISTS courses(title text, description text, instructor text,current_price INTEGER, original_price INTEGER, rating REAL, hours REAL, lectures INTEGER)")26con.commit()27
28
29# Call functions to connect to database and create table
30con = sql_connection()31sql_table(con)32
33# Function to insert into table
34
35
36def sql_insert(con, entities):37cursorObj = con.cursor()38cursorObj.execute(39'INSERT INTO courses(title, description, instructor, current_price, original_price, rating, hours, lectures) VALUES(?, ?, ?, ?, ?, ?, ?, ?)', entities)40con.commit()41
42
43# Get chrome driver path
44driver_path = input("Enter chrome driver path: ")45
46print("\nSome Categories Available on Udemy include:\nDevelopment - Python, Web Development, Javascript, Java \nDesign - Photoshop, Blender, Graphic design\n")47
48# Get input for course category to scrape
49category = input("Enter course category: ")50
51url = 'https://www.udemy.com/courses/search/?src=ukw&q={}'.format(category)52
53# initiating the webdriver. Parameter includes the path of the webdriver.
54driver = webdriver.Chrome(driver_path)55driver.get(url)56
57# this is just to ensure that the page is loaded
58time.sleep(5)59html = driver.page_source60
61# Now apply bs4 to html variable
62soup = BeautifulSoup(html, "html.parser")63course_divs = soup.find_all(64"div", {"class": "course-card--container--3w8Zm course-card--large--1BVxY"})65
66# Get all course divs and extract information from individual divs
67for course_div in course_divs:68title = course_div.find("div", {69"class": "udlite-focus-visible-target udlite-heading-md course-card--course-title--2f7tE"}).text.strip()70description = course_div.find(71"p", {"class": "udlite-text-sm course-card--course-headline--yIrRk"}).text.strip()72instructor = course_div.find(73"div", {"class": "udlite-text-xs course-card--instructor-list--lIA4f"}).text.strip()74
75current_price = course_div.find(76"div", {"class": "price-text--price-part--Tu6MH course-card--discount-price--3TaBk udlite-heading-md"}).text.strip()77current_price = current_price.replace("Current price₹", "")78
79original_price = course_div.find(80"div", {"class": "price-text--price-part--Tu6MH price-text--original-price--2e-F5 course-card--list-price--2AO6G udlite-text-sm"}).text.strip()81original_price = original_price.replace("Original Price₹", "")82
83rating = course_div.find("span", {84"class": "udlite-heading-sm star-rating--rating-number--3lVe8"}).text.strip()85
86hours = course_div.find_all(87"span", {"class": "course-card--row--1OMjg"})[0].text.strip().split()[0]88
89lectures = course_div.find_all(90"span", {"class": "course-card--row--1OMjg"})[1].text.strip().split()[0]91
92entities = (title, description, instructor, current_price,93original_price, rating, hours, lectures)94sql_insert(con, entities)95
96print("Saved successfully in database!")97
98driver.close() # closing the webdriver99