embedchain
130 строк · 3.9 Кб
1import hashlib2from unittest.mock import Mock, patch3
4import pytest5from requests import Response6
7from embedchain.loaders.docs_site_loader import DocsSiteLoader8
9
10@pytest.fixture11def mock_requests_get():12with patch("requests.get") as mock_get:13yield mock_get14
15
16@pytest.fixture17def docs_site_loader():18return DocsSiteLoader()19
20
21def test_get_child_links_recursive(mock_requests_get, docs_site_loader):22mock_response = Mock()23mock_response.status_code = 20024mock_response.text = """25<html>
26<a href="/page1">Page 1</a>
27<a href="/page2">Page 2</a>
28</html>
29"""
30mock_requests_get.return_value = mock_response31
32docs_site_loader._get_child_links_recursive("https://example.com")33
34assert len(docs_site_loader.visited_links) == 235assert "https://example.com/page1" in docs_site_loader.visited_links36assert "https://example.com/page2" in docs_site_loader.visited_links37
38
39def test_get_child_links_recursive_status_not_200(mock_requests_get, docs_site_loader):40mock_response = Mock()41mock_response.status_code = 40442mock_requests_get.return_value = mock_response43
44docs_site_loader._get_child_links_recursive("https://example.com")45
46assert len(docs_site_loader.visited_links) == 047
48
49def test_get_all_urls(mock_requests_get, docs_site_loader):50mock_response = Mock()51mock_response.status_code = 20052mock_response.text = """53<html>
54<a href="/page1">Page 1</a>
55<a href="/page2">Page 2</a>
56<a href="https://example.com/external">External</a>
57</html>
58"""
59mock_requests_get.return_value = mock_response60
61all_urls = docs_site_loader._get_all_urls("https://example.com")62
63assert len(all_urls) == 364assert "https://example.com/page1" in all_urls65assert "https://example.com/page2" in all_urls66assert "https://example.com/external" in all_urls67
68
69def test_load_data_from_url(mock_requests_get, docs_site_loader):70mock_response = Mock()71mock_response.status_code = 20072mock_response.content = """73<html>
74<nav>
75<h1>Navigation</h1>
76</nav>
77<article class="bd-article">
78<p>Article Content</p>
79</article>
80</html>
81""".encode()82mock_requests_get.return_value = mock_response83
84data = docs_site_loader._load_data_from_url("https://example.com/page1")85
86assert len(data) == 187assert data[0]["content"] == "Article Content"88assert data[0]["meta_data"]["url"] == "https://example.com/page1"89
90
91def test_load_data_from_url_status_not_200(mock_requests_get, docs_site_loader):92mock_response = Mock()93mock_response.status_code = 40494mock_requests_get.return_value = mock_response95
96data = docs_site_loader._load_data_from_url("https://example.com/page1")97
98assert data == []99assert len(data) == 0100
101
102def test_load_data(mock_requests_get, docs_site_loader):103mock_response = Response()104mock_response.status_code = 200105mock_response._content = """106<html>
107<a href="/page1">Page 1</a>
108<a href="/page2">Page 2</a>
109""".encode()110mock_requests_get.return_value = mock_response111
112url = "https://example.com"113data = docs_site_loader.load_data(url)114expected_doc_id = hashlib.sha256((" ".join(docs_site_loader.visited_links) + url).encode()).hexdigest()115
116assert len(data["data"]) == 2117assert data["doc_id"] == expected_doc_id118
119
120def test_if_response_status_not_200(mock_requests_get, docs_site_loader):121mock_response = Response()122mock_response.status_code = 404123mock_requests_get.return_value = mock_response124
125url = "https://example.com"126data = docs_site_loader.load_data(url)127expected_doc_id = hashlib.sha256((" ".join(docs_site_loader.visited_links) + url).encode()).hexdigest()128
129assert len(data["data"]) == 0130assert data["doc_id"] == expected_doc_id131