haystack

Форк
0
/
test_document_writer.py 
91 строка · 3.8 Кб
1
import pytest
2

3
from haystack import Document, DeserializationError
4
from haystack.testing.factory import document_store_class
5
from haystack.components.writers.document_writer import DocumentWriter
6
from haystack.document_stores.types import DuplicatePolicy
7
from haystack.document_stores.in_memory import InMemoryDocumentStore
8

9

10
class TestDocumentWriter:
11
    def test_to_dict(self):
12
        mocked_docstore_class = document_store_class("MockedDocumentStore")
13
        component = DocumentWriter(document_store=mocked_docstore_class())
14
        data = component.to_dict()
15
        assert data == {
16
            "type": "haystack.components.writers.document_writer.DocumentWriter",
17
            "init_parameters": {
18
                "document_store": {"type": "haystack.testing.factory.MockedDocumentStore", "init_parameters": {}},
19
                "policy": "NONE",
20
            },
21
        }
22

23
    def test_to_dict_with_custom_init_parameters(self):
24
        mocked_docstore_class = document_store_class("MockedDocumentStore")
25
        component = DocumentWriter(document_store=mocked_docstore_class(), policy=DuplicatePolicy.SKIP)
26
        data = component.to_dict()
27
        assert data == {
28
            "type": "haystack.components.writers.document_writer.DocumentWriter",
29
            "init_parameters": {
30
                "document_store": {"type": "haystack.testing.factory.MockedDocumentStore", "init_parameters": {}},
31
                "policy": "SKIP",
32
            },
33
        }
34

35
    def test_from_dict(self):
36
        data = {
37
            "type": "haystack.components.writers.document_writer.DocumentWriter",
38
            "init_parameters": {
39
                "document_store": {
40
                    "type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore",
41
                    "init_parameters": {},
42
                },
43
                "policy": "SKIP",
44
            },
45
        }
46
        component = DocumentWriter.from_dict(data)
47
        assert isinstance(component.document_store, InMemoryDocumentStore)
48
        assert component.policy == DuplicatePolicy.SKIP
49

50
    def test_from_dict_without_docstore(self):
51
        data = {"type": "DocumentWriter", "init_parameters": {}}
52
        with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):
53
            DocumentWriter.from_dict(data)
54

55
    def test_from_dict_without_docstore_type(self):
56
        data = {"type": "DocumentWriter", "init_parameters": {"document_store": {"init_parameters": {}}}}
57
        with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"):
58
            DocumentWriter.from_dict(data)
59

60
    def test_from_dict_nonexisting_docstore(self):
61
        data = {
62
            "type": "DocumentWriter",
63
            "init_parameters": {"document_store": {"type": "Nonexisting.DocumentStore", "init_parameters": {}}},
64
        }
65
        with pytest.raises(DeserializationError):
66
            DocumentWriter.from_dict(data)
67

68
    def test_run(self):
69
        document_store = InMemoryDocumentStore()
70
        writer = DocumentWriter(document_store)
71
        documents = [
72
            Document(content="This is the text of a document."),
73
            Document(content="This is the text of another document."),
74
        ]
75

76
        result = writer.run(documents=documents)
77
        assert result["documents_written"] == 2
78

79
    def test_run_skip_policy(self):
80
        document_store = InMemoryDocumentStore()
81
        writer = DocumentWriter(document_store, policy=DuplicatePolicy.SKIP)
82
        documents = [
83
            Document(content="This is the text of a document."),
84
            Document(content="This is the text of another document."),
85
        ]
86

87
        result = writer.run(documents=documents)
88
        assert result["documents_written"] == 2
89

90
        result = writer.run(documents=documents)
91
        assert result["documents_written"] == 0
92

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.