haystack
91 строка · 3.8 Кб
1import pytest2
3from haystack import Document, DeserializationError4from haystack.testing.factory import document_store_class5from haystack.components.writers.document_writer import DocumentWriter6from haystack.document_stores.types import DuplicatePolicy7from haystack.document_stores.in_memory import InMemoryDocumentStore8
9
10class TestDocumentWriter:11def test_to_dict(self):12mocked_docstore_class = document_store_class("MockedDocumentStore")13component = DocumentWriter(document_store=mocked_docstore_class())14data = component.to_dict()15assert data == {16"type": "haystack.components.writers.document_writer.DocumentWriter",17"init_parameters": {18"document_store": {"type": "haystack.testing.factory.MockedDocumentStore", "init_parameters": {}},19"policy": "NONE",20},21}22
23def test_to_dict_with_custom_init_parameters(self):24mocked_docstore_class = document_store_class("MockedDocumentStore")25component = DocumentWriter(document_store=mocked_docstore_class(), policy=DuplicatePolicy.SKIP)26data = component.to_dict()27assert data == {28"type": "haystack.components.writers.document_writer.DocumentWriter",29"init_parameters": {30"document_store": {"type": "haystack.testing.factory.MockedDocumentStore", "init_parameters": {}},31"policy": "SKIP",32},33}34
35def test_from_dict(self):36data = {37"type": "haystack.components.writers.document_writer.DocumentWriter",38"init_parameters": {39"document_store": {40"type": "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore",41"init_parameters": {},42},43"policy": "SKIP",44},45}46component = DocumentWriter.from_dict(data)47assert isinstance(component.document_store, InMemoryDocumentStore)48assert component.policy == DuplicatePolicy.SKIP49
50def test_from_dict_without_docstore(self):51data = {"type": "DocumentWriter", "init_parameters": {}}52with pytest.raises(DeserializationError, match="Missing 'document_store' in serialization data"):53DocumentWriter.from_dict(data)54
55def test_from_dict_without_docstore_type(self):56data = {"type": "DocumentWriter", "init_parameters": {"document_store": {"init_parameters": {}}}}57with pytest.raises(DeserializationError, match="Missing 'type' in document store's serialization data"):58DocumentWriter.from_dict(data)59
60def test_from_dict_nonexisting_docstore(self):61data = {62"type": "DocumentWriter",63"init_parameters": {"document_store": {"type": "Nonexisting.DocumentStore", "init_parameters": {}}},64}65with pytest.raises(DeserializationError):66DocumentWriter.from_dict(data)67
68def test_run(self):69document_store = InMemoryDocumentStore()70writer = DocumentWriter(document_store)71documents = [72Document(content="This is the text of a document."),73Document(content="This is the text of another document."),74]75
76result = writer.run(documents=documents)77assert result["documents_written"] == 278
79def test_run_skip_policy(self):80document_store = InMemoryDocumentStore()81writer = DocumentWriter(document_store, policy=DuplicatePolicy.SKIP)82documents = [83Document(content="This is the text of a document."),84Document(content="This is the text of another document."),85]86
87result = writer.run(documents=documents)88assert result["documents_written"] == 289
90result = writer.run(documents=documents)91assert result["documents_written"] == 092