Amazing-Python-Scripts
103 строки · 4.5 Кб
1import requests as req
2import re
3import csv
4
5# Social Media links to look for
6media_links = [
7"instagram",
8"facebook",
9"twitter",
10"linkedin",
11"youtube",
12]
13
14output_link_format = []
15
16val = int(
17input(
18"\n----------------------------\n Social Media Links Scraper\n----------------------------\n1. Single Website\n2. CSV file of websites\nEnter your choice from 1 or 2 : "
19))
20
21# Single Website Data
22if val == 1:
23
24# Takes website link as input
25link = input("\nEnter the website url : ")
26
27# Makes a get request to the url
28r = req.get(link)
29# print(r)
30
31if r.status_code == 200:
32# Finds all links on the website's page
33all_links = re.findall(
34r"\b((?:https?://)?(?:(?:www\.)?(?:[\da-z\.-]+)\.(?:[a-z]{2,6})|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])))(?::[0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])?(?:/[\w\.-]*)*/?)\b",
35r.text)
36# print(all_links)
37
38found_links = {
39"url": link,
40"instagram": "",
41"facebook": "",
42"twitter": "",
43"linkedin": "",
44"youtube": "",
45}
46
47# Searching for links
48for i in all_links:
49for j in media_links:
50if i.find(j) > 0:
51found_links[j] = i
52
53# Displaying output
54print()
55for k, v in found_links.items():
56print(k, ":", v)
57
58# Reading sites from csv file and writing output to a new csv file
59elif val == 2:
60
61# Taking file path as input and opening it
62csv_file = open(input("\nEnter file path : "))
63
64# Iterating through links in csv file
65for link in csv_file:
66link = str(link).strip()
67r = req.get(link)
68# print(r)
69
70if r.status_code == 200:
71found_links = {
72"url": link,
73"instagram": "",
74"facebook": "",
75"twitter": "",
76"linkedin": "",
77"youtube": "",
78}
79all_links = re.findall(
80r"\b((?:https?://)?(?:(?:www\.)?(?:[\da-z\.-]+)\.(?:[a-z]{2,6})|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(?::[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(?:ffff(?::0{1,4}){0,1}:){0,1}(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])|(?:[0-9a-fA-F]{1,4}:){1,4}:(?:(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(?:25[0-5]|(?:2[0-4]|1{0,1}[0-9]){0,1}[0-9])))(?::[0-9]{1,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])?(?:/[\w\.-]*)*/?)\b",
81r.text)
82# print(all_links)
83for i in all_links:
84for j in media_links:
85if i.find(j) > 0:
86found_links[j] = i
87output_link_format.append(found_links)
88else:
89print(link, "did not return status code 200")
90# print(output_link_format)
91
92# Generating output csv file
93print("Generating output csv file...")
94keys = output_link_format[0].keys()
95
96with open('output.csv', 'w', newline='') as output_file:
97dict_writer = csv.DictWriter(output_file, keys)
98dict_writer.writeheader()
99dict_writer.writerows(output_link_format)
100
101print("Output csv file generated successfully")
102else:
103print("Invalid choice! Enter choice between 1-2")
104