embedchain
113 строк · 4.0 Кб
1import csv
2import os
3import pathlib
4import tempfile
5from unittest.mock import MagicMock, patch
6
7import pytest
8
9from embedchain.loaders.csv import CsvLoader
10
11
12@pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
13def test_load_data(delimiter):
14"""
15Test csv loader
16
17Tests that file is loaded, metadata is correct and content is correct
18"""
19# Creating temporary CSV file
20with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
21writer = csv.writer(tmpfile, delimiter=delimiter)
22writer.writerow(["Name", "Age", "Occupation"])
23writer.writerow(["Alice", "28", "Engineer"])
24writer.writerow(["Bob", "35", "Doctor"])
25writer.writerow(["Charlie", "22", "Student"])
26
27tmpfile.seek(0)
28filename = tmpfile.name
29
30# Loading CSV using CsvLoader
31loader = CsvLoader()
32result = loader.load_data(filename)
33data = result["data"]
34
35# Assertions
36assert len(data) == 3
37assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
38assert data[0]["meta_data"]["url"] == filename
39assert data[0]["meta_data"]["row"] == 1
40assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
41assert data[1]["meta_data"]["url"] == filename
42assert data[1]["meta_data"]["row"] == 2
43assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
44assert data[2]["meta_data"]["url"] == filename
45assert data[2]["meta_data"]["row"] == 3
46
47# Cleaning up the temporary file
48os.unlink(filename)
49
50
51@pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
52def test_load_data_with_file_uri(delimiter):
53"""
54Test csv loader with file URI
55
56Tests that file is loaded, metadata is correct and content is correct
57"""
58# Creating temporary CSV file
59with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
60writer = csv.writer(tmpfile, delimiter=delimiter)
61writer.writerow(["Name", "Age", "Occupation"])
62writer.writerow(["Alice", "28", "Engineer"])
63writer.writerow(["Bob", "35", "Doctor"])
64writer.writerow(["Charlie", "22", "Student"])
65
66tmpfile.seek(0)
67filename = pathlib.Path(tmpfile.name).as_uri() # Convert path to file URI
68
69# Loading CSV using CsvLoader
70loader = CsvLoader()
71result = loader.load_data(filename)
72data = result["data"]
73
74# Assertions
75assert len(data) == 3
76assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
77assert data[0]["meta_data"]["url"] == filename
78assert data[0]["meta_data"]["row"] == 1
79assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
80assert data[1]["meta_data"]["url"] == filename
81assert data[1]["meta_data"]["row"] == 2
82assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
83assert data[2]["meta_data"]["url"] == filename
84assert data[2]["meta_data"]["row"] == 3
85
86# Cleaning up the temporary file
87os.unlink(tmpfile.name)
88
89
90@pytest.mark.parametrize("content", ["ftp://example.com", "sftp://example.com", "mailto://example.com"])
91def test_get_file_content(content):
92with pytest.raises(ValueError):
93loader = CsvLoader()
94loader._get_file_content(content)
95
96
97@pytest.mark.parametrize("content", ["http://example.com", "https://example.com"])
98def test_get_file_content_http(content):
99"""
100Test _get_file_content method of CsvLoader for http and https URLs
101"""
102
103with patch("requests.get") as mock_get:
104mock_response = MagicMock()
105mock_response.text = "Name,Age,Occupation\nAlice,28,Engineer\nBob,35,Doctor\nCharlie,22,Student"
106mock_get.return_value = mock_response
107
108loader = CsvLoader()
109file_content = loader._get_file_content(content)
110
111mock_get.assert_called_once_with(content)
112mock_response.raise_for_status.assert_called_once()
113assert file_content.read() == mock_response.text
114