mteb

Форк
0
/
create_tasks_table.py 
85 строк · 2.6 Кб
1
from __future__ import annotations
2

3
import re
4
from pathlib import Path
5

6
import mteb
7

8

9
def author_from_bibtex(bibtex: str | None) -> str:
10
    """Create (Authors, Year) from bibtex entry (author = {Authors}, year = {Year})"""
11
    if bibtex is None:
12
        return ""
13
    # get authors from bibtex (author = {Authors} or author={Authors})
14
    authors = re.search(r"author\s*=\s*{([^}]*)}", bibtex)
15
    if authors is None:
16
        return ""
17
    authors = authors.group(1)
18
    authors = [a.split(", ") for a in authors.split(" and ")]
19
    author_str_w_et_al = (
20
        authors[0][0] + " et al." if len(authors[0]) > 1 else authors[0][0]
21
    )
22
    # replace any newline characters
23
    author_str_w_et_al = author_str_w_et_al.replace("\n", " ")
24
    year = re.search(r"year\s*=\s*{([^}]*)}", bibtex)
25
    if year is None:
26
        return ""
27
    year_str = year.group(1)
28
    return f" ({author_str_w_et_al}, {year_str})"
29

30

31
def task_to_markdown_row(task: mteb.AbsTask) -> str:
32
    name = task.metadata.name
33
    name_w_reference = (
34
        f"[{name}]({task.metadata.reference})" if task.metadata.reference else name
35
    )
36
    domains = (
37
        "[" + ", ".join(task.metadata.domains) + "]" if task.metadata.domains else ""
38
    )
39
    n_samples = task.metadata.n_samples if task.metadata.n_samples else ""
40
    avg_character_length = (
41
        task.metadata.avg_character_length if task.metadata.avg_character_length else ""
42
    )
43

44
    name_w_reference += author_from_bibtex(task.metadata.bibtex_citation)
45

46
    return f"| {name_w_reference} | {task.metadata.languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |"
47

48

49
def create_tasks_table(tasks: list[mteb.AbsTask]) -> str:
50
    table = """
51
| Name | Languages | Type | Category | Domains | # Samples | Avg. Length (Char.) |
52
|------|-----------|------|----------|---------|-----------|---------------------|
53
"""
54
    for task in tasks:
55
        table += task_to_markdown_row(task) + "\n"
56
    return table
57

58

59
def insert_table(file_path, table):
60
    """Insert table in the in <!-- TABLE START --> and <!-- TABLE END -->"""
61
    with open(file_path, "r") as file:
62
        md = file.read()
63

64
    start = "<!-- TABLE START -->"
65
    end = "<!-- TABLE END -->"
66

67
    md = md.replace(md[md.index(start) + len(start) : md.index(end)], table)
68

69
    with open(file_path, "w") as file:
70
        file.write(md)
71

72

73
def main():
74
    tasks = mteb.get_tasks()
75
    tasks = sorted(tasks, key=lambda x: x.metadata.name)
76

77
    table = create_tasks_table(tasks)
78

79
    file_path = Path(__file__).parent / "tasks.md"
80

81
    insert_table(file_path, table)
82

83

84
if __name__ == "__main__":
85
    main()
86

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.