Amazing-Python-Scripts

Форк
0
73 строки · 2.4 Кб
1
import re
2
import csv
3
from pdfminer.pdfpage import PDFPage
4
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
5
from pdfminer.converter import TextConverter
6
from pdfminer.layout import LAParams
7
import io
8

9

10
def extract_education(resume_text):
11
    education_pattern = r"((?:Bachelor|Master|Ph\.?D|Diploma)[^.,]*\b(?:\.\b)?(?:[^.,\n]*\b(?:University|College|School|Institute)\b[^.,\n]*)?)"
12
    education_matches = re.findall(
13
        education_pattern, resume_text, re.IGNORECASE)
14
    return education_matches
15

16

17
def extract_experience(resume_text):
18
    experience_pattern = r"(?:(?:[A-Z][a-z]+\s+){1,3})?(?:(?:\d{4}\s?-\s?\d{4}|\d{4})\s)?(?:(?:Present|Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[A-Za-z\s]+\d{4})"
19
    experience_matches = re.findall(
20
        experience_pattern, resume_text, re.IGNORECASE)
21
    return experience_matches
22

23

24
def extract_skills(resume_text, skills_list):
25
    skills_found = []
26
    for skill in skills_list:
27
        escaped_skill = re.escape(skill)
28
        if re.search(r'\b{}\b'.format(escaped_skill), resume_text, re.IGNORECASE):
29
            skills_found.append(skill)
30
    return skills_found
31

32

33
file_name = "resumes\Resume_12.pdf"
34
skills_file = "skills_list.csv"  # Path to the CSV file containing skills
35
i_f = open(file_name, 'rb')
36
res_mgr = PDFResourceManager()
37
ret_data = io.StringIO()
38
txt_converter = TextConverter(res_mgr, ret_data, laparams=LAParams())
39
interpreter = PDFPageInterpreter(res_mgr, txt_converter)
40
for page in PDFPage.get_pages(i_f):
41
    interpreter.process_page(page)
42
    resume_text = ret_data.getvalue()
43

44
# Extract educational and work experience
45
education = extract_education(resume_text)
46
experience = extract_experience(resume_text)
47

48
# Extract skills from CSV file
49
skills_list = []
50
with open(skills_file, 'r') as csv_file:
51
    reader = csv.reader(csv_file)
52
    for row in reader:
53
        skills_list.extend(row)
54

55
# Extract skills
56
skills = extract_skills(resume_text, skills_list)
57

58
# Print the extracted information
59
print("Educational Background:")
60
for edu in education:
61
    print(edu)
62

63
print("\nWork Experience:")
64
for exp in experience:
65
    print(exp)
66

67
print("\nSkills:")
68
for skill in skills:
69
    print(skill)
70

71
# Close the file and converter
72
i_f.close()
73
txt_converter.close()
74

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.