Amazing-Python-Scripts
174 строки · 6.8 Кб
1from selenium import webdriver2from selenium.webdriver.common.by import By3from selenium.webdriver.chrome.service import Service4from selenium.webdriver.support.ui import WebDriverWait5from selenium.webdriver.support import expected_conditions as EC6from selenium.webdriver.chrome.options import Options7
8"""
9Example code :
10python_scraper = Courses("python",5)
11print(python_scraper.scrape_all())
12"""
13
14
15class Courses:16def __init__(self, keyword, page_count):17self.keyword = keyword18self.page_count = page_count19
20def __scrape_page(self):21chromedriver_path = ''22options = Options()23options.add_argument("--headless")24driver = webdriver.Chrome(service=Service(25chromedriver_path), options=options)26wait = WebDriverWait(driver, 100)27driver.get('https://www.coursera.org/search?query=' + self.keyword)28return wait, driver29
30def scrape_all(self):31wait, driver = self.__scrape_page()32courses_data = []33try:34j = 035for i in range(self.page_count):36courses = wait.until(EC.visibility_of_all_elements_located(37(By.CSS_SELECTOR, 'main ul>li')))38for course in courses:39title = driver.execute_script(40'return arguments[0].querySelector("h3")?.innerText', course)41description = driver.execute_script(42'return arguments[0].querySelector("p>span")?.innerText', course)43review = driver.execute_script(44'return arguments[0].querySelector("div:has(>svg)")?.innerText.replace("\\n\\n","⭐")', course)45url = driver.execute_script(46'return String(arguments[0].querySelector("a")?.href)', course)47data = {"id": j, "title": title,48"description": description, "review": review, "url": url}49courses_data += [data]50j += 151next_btn = driver.find_element(52By.CSS_SELECTOR, 'button[aria-label="Next Page"]')53if 'disabled' in next_btn.get_attribute('class'):54print('There are no more pages')55break56else:57next_btn.click()58return {59"data": courses_data,60"message": f"Course Titles for {self.keyword}"61}62except:63return {64"data": None,65"message": f"No courses found for {self.keyword}"66}67
68def course_titles(self):69wait, driver = self.__scrape_page()70titles = []71try:72for i in range(self.page_count):73courses = wait.until(EC.visibility_of_all_elements_located(74(By.CSS_SELECTOR, 'main ul>li')))75titles.extend([driver.execute_script(76'return arguments[0].querySelector("h3")?.innerText', course) for course in courses])77next_btn = driver.find_element(78By.CSS_SELECTOR, 'button[aria-label="Next Page"]')79if 'disabled' in next_btn.get_attribute('class'):80print('There are no more pages')81break82else:83next_btn.click()84return {85"data": titles,86"message": f"Course Titles for {self.keyword}"87}88except:89return {90"data": None,91"message": f"No courses found for {self.keyword}"92}93
94def course_description(self):95wait, driver = self.__scrape_page()96descriptions = []97try:98for i in range(self.page_count):99courses = wait.until(EC.visibility_of_all_elements_located(100(By.CSS_SELECTOR, 'main ul>li')))101descriptions.extend([driver.execute_script(102'return arguments[0].querySelector("p>span")?.innerText', course) for course in courses])103next_btn = driver.find_element(104By.CSS_SELECTOR, 'button[aria-label="Next Page"]')105if 'disabled' in next_btn.get_attribute('class'):106print('There are no more pages')107break108else:109next_btn.click()110return {111"data": descriptions,112"message": f"Course Titles for {self.keyword}"113}114except:115return {116"data": None,117"message": f"No courses found for {self.keyword}"118}119
120def course_reviews(self):121wait, driver = self.__scrape_page()122reviews = []123try:124for i in range(self.page_count):125courses = wait.until(EC.visibility_of_all_elements_located(126(By.CSS_SELECTOR, 'main ul>li')))127reviews.extend([driver.execute_script(128'return arguments[0].querySelector("div:has(>svg)")?.innerText.replace("\\n\\n","⭐")', course) for course in courses])129next_btn = driver.find_element(130By.CSS_SELECTOR, 'button[aria-label="Next Page"]')131if 'disabled' in next_btn.get_attribute('class'):132print('There are no more pages')133break134else:135next_btn.click()136return {137"data": reviews,138"message": f"Course Titles for {self.keyword}"139}140except:141return {142"data": None,143"message": f"No courses found for {self.keyword}"144}145
146def course_urls(self):147wait, driver = self.__scrape_page()148urls = []149try:150for i in range(self.page_count):151courses = wait.until(EC.visibility_of_all_elements_located(152(By.CSS_SELECTOR, 'main ul>li')))153urls.extend([driver.execute_script(154'return String(arguments[0].querySelector("a")?.href)', course) for course in courses])155next_btn = driver.find_element(156By.CSS_SELECTOR, 'button[aria-label="Next Page"]')157if 'disabled' in next_btn.get_attribute('class'):158print('There are no more pages')159break160else:161next_btn.click()162return {163"data": urls,164"message": f"Course Titles for {self.keyword}"165}166except:167return {168"data": None,169"message": f"No courses found for {self.keyword}"170}171
172
173python_scraper = Courses("python", 5)174print(python_scraper.scrape_all())175