haystack

test_lost_in_the_middle.py
104 строки · 5.3 Кб
Перенос по словам
1
import pytest
2
from haystack import Document
3
from haystack.components.rankers.lost_in_the_middle import LostInTheMiddleRanker
4

5

6
class TestLostInTheMiddleRanker:
7
    def test_lost_in_the_middle_order_odd(self):
8
        # tests that lost_in_the_middle order works with an odd number of documents
9
        docs = [Document(content=str(i)) for i in range(1, 10)]
10
        ranker = LostInTheMiddleRanker()
11
        result = ranker.run(documents=docs)
12
        assert result["documents"]
13
        expected_order = "1 3 5 7 9 8 6 4 2".split()
14
        assert all(doc.content == expected_order[idx] for idx, doc in enumerate(result["documents"]))
15

16
    def test_lost_in_the_middle_order_even(self):
17
        # tests that lost_in_the_middle order works with an even number of documents
18
        docs = [Document(content=str(i)) for i in range(1, 11)]
19
        ranker = LostInTheMiddleRanker()
20
        result = ranker.run(documents=docs)
21
        expected_order = "1 3 5 7 9 10 8 6 4 2".split()
22
        assert all(doc.content == expected_order[idx] for idx, doc in enumerate(result["documents"]))
23

24
    def test_lost_in_the_middle_order_two_docs(self):
25
        # tests that lost_in_the_middle order works with two documents
26
        ranker = LostInTheMiddleRanker()
27
        # two docs
28
        docs = [Document(content="1"), Document(content="2")]
29
        result = ranker.run(documents=docs)
30
        assert result["documents"][0].content == "1"
31
        assert result["documents"][1].content == "2"
32

33
    def test_lost_in_the_middle_init(self):
34
        # tests that LostInTheMiddleRanker initializes with default values
35
        ranker = LostInTheMiddleRanker()
36
        assert ranker.word_count_threshold is None
37

38
        ranker = LostInTheMiddleRanker(word_count_threshold=10)
39
        assert ranker.word_count_threshold == 10
40

41
    def test_lost_in_the_middle_init_invalid_word_count_threshold(self):
42
        # tests that LostInTheMiddleRanker raises an error when word_count_threshold is <= 0
43
        with pytest.raises(ValueError, match="Invalid value for word_count_threshold"):
44
            LostInTheMiddleRanker(word_count_threshold=0)
45

46
        with pytest.raises(ValueError, match="Invalid value for word_count_threshold"):
47
            LostInTheMiddleRanker(word_count_threshold=-5)
48

49
    def test_lost_in_the_middle_with_word_count_threshold(self):
50
        # tests that lost_in_the_middle with word_count_threshold works as expected
51
        ranker = LostInTheMiddleRanker(word_count_threshold=6)
52
        docs = [Document(content="word" + str(i)) for i in range(1, 10)]
53
        # result, _ = ranker.run(query="", documents=docs)
54
        result = ranker.run(documents=docs)
55
        expected_order = "word1 word3 word5 word6 word4 word2".split()
56
        assert all(doc.content == expected_order[idx] for idx, doc in enumerate(result["documents"]))
57

58
        ranker = LostInTheMiddleRanker(word_count_threshold=9)
59
        # result, _ = ranker.run(query="", documents=docs)
60
        result = ranker.run(documents=docs)
61
        expected_order = "word1 word3 word5 word7 word9 word8 word6 word4 word2".split()
62
        assert all(doc.content == expected_order[idx] for idx, doc in enumerate(result["documents"]))
63

64
    def test_word_count_threshold_greater_than_total_number_of_words_returns_all_documents(self):
65
        ranker = LostInTheMiddleRanker(word_count_threshold=100)
66
        docs = [Document(content="word" + str(i)) for i in range(1, 10)]
67
        ordered_docs = ranker.run(documents=docs)
68
        # assert len(ordered_docs) == len(docs)
69
        expected_order = "word1 word3 word5 word7 word9 word8 word6 word4 word2".split()
70
        assert all(doc.content == expected_order[idx] for idx, doc in enumerate(ordered_docs["documents"]))
71

72
    def test_empty_documents_returns_empty_list(self):
73
        ranker = LostInTheMiddleRanker()
74
        result = ranker.run(documents=[])
75
        assert result == {"documents": []}
76

77
    def test_list_of_one_document_returns_same_document(self):
78
        ranker = LostInTheMiddleRanker()
79
        doc = Document(content="test")
80
        assert ranker.run(documents=[doc]) == {"documents": [doc]}
81

82
    @pytest.mark.parametrize("top_k", [1, 2, 3, 4, 5, 6, 7, 8, 12, 20])
83
    def test_lost_in_the_middle_order_with_top_k(self, top_k: int):
84
        # tests that lost_in_the_middle order works with an odd number of documents and a top_k parameter
85
        docs = [Document(content=str(i)) for i in range(1, 10)]
86
        ranker = LostInTheMiddleRanker()
87
        result = ranker.run(documents=docs, top_k=top_k)
88
        if top_k < len(docs):
89
            # top_k is less than the number of documents, so only the top_k documents should be returned in LITM order
90
            assert len(result["documents"]) == top_k
91
            expected_order = ranker.run(documents=[Document(content=str(i)) for i in range(1, top_k + 1)])
92
            assert result == expected_order
93
        else:
94
            # top_k is greater than the number of documents, so all documents should be returned in LITM order
95
            assert len(result["documents"]) == len(docs)
96
            assert result == ranker.run(documents=docs)
97

98
    def test_to_dict(self):
99
        component = LostInTheMiddleRanker()
100
        data = component.to_dict()
101
        assert data == {
102
            "type": "haystack.components.rankers.lost_in_the_middle.LostInTheMiddleRanker",
103
            "init_parameters": {"word_count_threshold": None, "top_k": None},
104
        }
105
haystack

Использование cookies