unstructured
58 строк · 1.9 Кб
1import pytest
2
3from unstructured.documents.base import Document, Page
4from unstructured.documents.elements import Formula, NarrativeText, Title
5
6
7class MockDocument(Document):
8def __init__(self):
9super().__init__()
10elements = [
11Title(text="This is a narrative."),
12NarrativeText(text="This is a narrative."),
13NarrativeText(text="This is a narrative."),
14]
15page = Page(number=0)
16page.elements = elements
17self._pages = [page]
18
19
20class MockDocumentWithFormula(Document):
21def __init__(self):
22super().__init__()
23elements = [
24Title(text="This is a narrative."),
25Formula(text="e=mc2"),
26]
27page = Page(number=0)
28page.elements = elements
29self._pages = [page]
30
31
32def test_get_narrative():
33document = MockDocument()
34narrative = document.get_narrative()
35for element in narrative:
36assert isinstance(element, NarrativeText)
37document.print_narrative()
38
39
40def test_get_formula():
41document = MockDocumentWithFormula()
42formula = [e for e in document.elements if isinstance(e, Formula)]
43assert formula[0].text != ""
44
45
46@pytest.mark.parametrize("index", [0, 1, 2])
47def test_split(index):
48document = MockDocument()
49elements = document.pages[0].elements
50split_before_doc = document.before_element(elements[index])
51before_elements = split_before_doc.pages[0].elements if split_before_doc.pages else []
52split_after_doc = document.after_element(elements[index])
53after_elements = split_after_doc.pages[0].elements if split_after_doc.pages else []
54expected_before_elements = document.pages[0].elements[:index]
55next_index = index + 1
56expected_after_elements = document.pages[0].elements[next_index:]
57assert all(a.id == b.id for a, b in zip(before_elements, expected_before_elements))
58assert all(a.id == b.id for a, b in zip(after_elements, expected_after_elements))
59