innopolis-fwd-project-backend
/
parser.py
73 строки · 2.4 Кб
1import datetime2import os3from selenium import webdriver4from selenium.webdriver.common.by import By5
6months = {71: "jan",82: "feb",93: "mar",104: "apr",115: "may",126: "june",137: "july",148: "aug",159: "sep",1610: "oct",1711: "nov",1812: "dec"19}
20
21
22def get_painting_url(year: int, month: int, day: int) -> str:23url = f'https://www.hiroshimatsumoto.com/{year}paintings/works/' \24f'{months[month]}{day if day > 9 else "0" + str(day)}{year}.jpg'25print(url)26return url27
28
29def download_image(image_url, save_path) -> None:30op = webdriver.ChromeOptions()31op.add_argument('headless')32driver = webdriver.Chrome(options=op)33driver.get(image_url)34
35if not '404' in driver.title:36with open(save_path, 'wb') as f:37f.write(driver.find_element(By.XPATH, '/html/body/img').screenshot_as_png)38print('downloaded')39
40
41def get_image(year: int, month: int, day: int) -> None:42url = get_painting_url(year, month, day)43
44if not os.path.exists(os.path.dirname(os.path.realpath(__file__)) + '/photos'):45os.mkdir(os.path.dirname(os.path.realpath(__file__)) + '/photos')46print('created /photos')47
48if not os.path.exists(os.path.dirname(os.path.realpath(__file__)) + f'/photos/{year}'):49os.mkdir(os.path.dirname(os.path.realpath(__file__)) + f'/photos/{year}')50print(f'created /photos/{year}')51
52if not os.path.exists(os.path.dirname(os.path.realpath(__file__)) + f'/photos/{year}/{month}'):53os.mkdir(os.path.dirname(os.path.realpath(__file__)) + f'/photos/{year}/{month}')54print(f'created /photos/{year}/{month}')55
56if not os.path.exists(os.path.dirname(os.path.realpath(__file__)) + f'/photos/{year}/{month}/{day if day > 9 else "0" + str(day)}'):57os.mkdir(os.path.dirname(os.path.realpath(__file__)) + f'/photos/{year}/{month}/{day if day > 9 else "0" + str(day)}')58print(f'created /photos/{year}/{month}/{day}')59
60download_image(url, os.path.dirname(os.path.realpath(__file__)) + f"/photos/{year}/{month}/{day if day > 9 else '0' + str(day)}/image.png")61
62
63def main() -> None:64start_date = datetime.date(2023, 7, 19)65end_date = datetime.date.today()66
67while start_date < end_date:68get_image(start_date.year, start_date.month, start_date.day)69start_date += datetime.timedelta(days=1)70
71
72if __name__ == "__main__":73main()74