embedchain
62 строки · 1.8 Кб
1import tempfile
2
3import pytest
4
5from embedchain.loaders.xml import XmlLoader
6
7# Taken from https://github.com/langchain-ai/langchain/blob/master/libs/langchain/tests/integration_tests/examples/factbook.xml
8SAMPLE_XML = """<?xml version="1.0" encoding="UTF-8"?>
9<factbook>
10<country>
11<name>United States</name>
12<capital>Washington, DC</capital>
13<leader>Joe Biden</leader>
14<sport>Baseball</sport>
15</country>
16<country>
17<name>Canada</name>
18<capital>Ottawa</capital>
19<leader>Justin Trudeau</leader>
20<sport>Hockey</sport>
21</country>
22<country>
23<name>France</name>
24<capital>Paris</capital>
25<leader>Emmanuel Macron</leader>
26<sport>Soccer</sport>
27</country>
28<country>
29<name>Trinidad & Tobado</name>
30<capital>Port of Spain</capital>
31<leader>Keith Rowley</leader>
32<sport>Track & Field</sport>
33</country>
34</factbook>"""
35
36
37@pytest.mark.parametrize("xml", [SAMPLE_XML])
38def test_load_data(xml: str):
39"""
40Test XML loader
41
42Tests that XML file is loaded, metadata is correct and content is correct
43"""
44# Creating temporary XML file
45with tempfile.NamedTemporaryFile(mode="w+") as tmpfile:
46tmpfile.write(xml)
47
48tmpfile.seek(0)
49filename = tmpfile.name
50
51# Loading CSV using XmlLoader
52loader = XmlLoader()
53result = loader.load_data(filename)
54data = result["data"]
55
56# Assertions
57assert len(data) == 1
58assert "United States Washington, DC Joe Biden" in data[0]["content"]
59assert "Canada Ottawa Justin Trudeau" in data[0]["content"]
60assert "France Paris Emmanuel Macron" in data[0]["content"]
61assert "Trinidad & Tobado Port of Spain Keith Rowley" in data[0]["content"]
62assert data[0]["meta_data"]["url"] == filename
63