1
from __future__ import annotations
4
from pathlib import Path
9
def author_from_bibtex(bibtex: str | None) -> str:
10
"""Create (Authors, Year) from bibtex entry (author = {Authors}, year = {Year})"""
14
authors = re.search(r"author\s*=\s*{([^}]*)}", bibtex)
17
authors = authors.group(1)
18
authors = [a.split(", ") for a in authors.split(" and ")]
19
author_str_w_et_al = (
20
authors[0][0] + " et al." if len(authors[0]) > 1 else authors[0][0]
23
author_str_w_et_al = author_str_w_et_al.replace("\n", " ")
24
year = re.search(r"year\s*=\s*{([^}]*)}", bibtex)
27
year_str = year.group(1)
28
return f" ({author_str_w_et_al}, {year_str})"
31
def task_to_markdown_row(task: mteb.AbsTask) -> str:
32
name = task.metadata.name
34
f"[{name}]({task.metadata.reference})" if task.metadata.reference else name
37
"[" + ", ".join(task.metadata.domains) + "]" if task.metadata.domains else ""
39
n_samples = task.metadata.n_samples if task.metadata.n_samples else ""
40
avg_character_length = (
41
task.metadata.avg_character_length if task.metadata.avg_character_length else ""
44
name_w_reference += author_from_bibtex(task.metadata.bibtex_citation)
46
return f"| {name_w_reference} | {task.metadata.languages} | {task.metadata.type} | {task.metadata.category} | {domains} | {n_samples} | {avg_character_length} |"
49
def create_tasks_table(tasks: list[mteb.AbsTask]) -> str:
51
| Name | Languages | Type | Category | Domains | # Samples | Avg. Length (Char.) |
52
|------|-----------|------|----------|---------|-----------|---------------------|
55
table += task_to_markdown_row(task) + "\n"
59
def insert_table(file_path, table):
60
"""Insert table in the in <!-- TABLE START --> and <!-- TABLE END -->"""
61
with open(file_path, "r") as file:
64
start = "<!-- TABLE START -->"
65
end = "<!-- TABLE END -->"
67
md = md.replace(md[md.index(start) + len(start) : md.index(end)], table)
69
with open(file_path, "w") as file:
74
tasks = mteb.get_tasks()
75
tasks = sorted(tasks, key=lambda x: x.metadata.name)
77
table = create_tasks_table(tasks)
79
file_path = Path(__file__).parent / "tasks.md"
81
insert_table(file_path, table)
84
if __name__ == "__main__":