embedchain

Форк
0
/
test_csv.py 
113 строк · 4.0 Кб
1
import csv
2
import os
3
import pathlib
4
import tempfile
5
from unittest.mock import MagicMock, patch
6

7
import pytest
8

9
from embedchain.loaders.csv import CsvLoader
10

11

12
@pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
13
def test_load_data(delimiter):
14
    """
15
    Test csv loader
16

17
    Tests that file is loaded, metadata is correct and content is correct
18
    """
19
    # Creating temporary CSV file
20
    with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
21
        writer = csv.writer(tmpfile, delimiter=delimiter)
22
        writer.writerow(["Name", "Age", "Occupation"])
23
        writer.writerow(["Alice", "28", "Engineer"])
24
        writer.writerow(["Bob", "35", "Doctor"])
25
        writer.writerow(["Charlie", "22", "Student"])
26

27
        tmpfile.seek(0)
28
        filename = tmpfile.name
29

30
        # Loading CSV using CsvLoader
31
        loader = CsvLoader()
32
        result = loader.load_data(filename)
33
        data = result["data"]
34

35
        # Assertions
36
        assert len(data) == 3
37
        assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
38
        assert data[0]["meta_data"]["url"] == filename
39
        assert data[0]["meta_data"]["row"] == 1
40
        assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
41
        assert data[1]["meta_data"]["url"] == filename
42
        assert data[1]["meta_data"]["row"] == 2
43
        assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
44
        assert data[2]["meta_data"]["url"] == filename
45
        assert data[2]["meta_data"]["row"] == 3
46

47
        # Cleaning up the temporary file
48
        os.unlink(filename)
49

50

51
@pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
52
def test_load_data_with_file_uri(delimiter):
53
    """
54
    Test csv loader with file URI
55

56
    Tests that file is loaded, metadata is correct and content is correct
57
    """
58
    # Creating temporary CSV file
59
    with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
60
        writer = csv.writer(tmpfile, delimiter=delimiter)
61
        writer.writerow(["Name", "Age", "Occupation"])
62
        writer.writerow(["Alice", "28", "Engineer"])
63
        writer.writerow(["Bob", "35", "Doctor"])
64
        writer.writerow(["Charlie", "22", "Student"])
65

66
        tmpfile.seek(0)
67
        filename = pathlib.Path(tmpfile.name).as_uri()  # Convert path to file URI
68

69
        # Loading CSV using CsvLoader
70
        loader = CsvLoader()
71
        result = loader.load_data(filename)
72
        data = result["data"]
73

74
        # Assertions
75
        assert len(data) == 3
76
        assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
77
        assert data[0]["meta_data"]["url"] == filename
78
        assert data[0]["meta_data"]["row"] == 1
79
        assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
80
        assert data[1]["meta_data"]["url"] == filename
81
        assert data[1]["meta_data"]["row"] == 2
82
        assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
83
        assert data[2]["meta_data"]["url"] == filename
84
        assert data[2]["meta_data"]["row"] == 3
85

86
        # Cleaning up the temporary file
87
        os.unlink(tmpfile.name)
88

89

90
@pytest.mark.parametrize("content", ["ftp://example.com", "sftp://example.com", "mailto://example.com"])
91
def test_get_file_content(content):
92
    with pytest.raises(ValueError):
93
        loader = CsvLoader()
94
        loader._get_file_content(content)
95

96

97
@pytest.mark.parametrize("content", ["http://example.com", "https://example.com"])
98
def test_get_file_content_http(content):
99
    """
100
    Test _get_file_content method of CsvLoader for http and https URLs
101
    """
102

103
    with patch("requests.get") as mock_get:
104
        mock_response = MagicMock()
105
        mock_response.text = "Name,Age,Occupation\nAlice,28,Engineer\nBob,35,Doctor\nCharlie,22,Student"
106
        mock_get.return_value = mock_response
107

108
        loader = CsvLoader()
109
        file_content = loader._get_file_content(content)
110

111
        mock_get.assert_called_once_with(content)
112
        mock_response.raise_for_status.assert_called_once()
113
        assert file_content.read() == mock_response.text
114

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.