5
from pathlib import Path
6
from xml.etree import ElementTree
11
from bs4 import BeautifulSoup
13
from autogpt.commands.file_operations_utils import (
18
logger = logging.getLogger(__name__)
20
plain_text_str = "Hello, world!"
24
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f:
25
f.write(plain_text_str)
30
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
31
f.write(plain_text_str)
36
with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".pdf") as f:
37
# Create a new PDF and add a page with the text plain_text_str
38
# Write the PDF header
39
f.write(b"%PDF-1.7\n")
40
# Write the document catalog
42
f.write(b"<< /Type /Catalog /Pages 2 0 R >>\n")
44
# Write the page object
47
b"<< /Type /Page /Parent 1 0 R /Resources << /Font << /F1 3 0 R >> >> "
48
b"/MediaBox [0 0 612 792] /Contents 4 0 R >>\n"
51
# Write the font object
54
b"<< /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica-Bold >>\n"
57
# Write the page contents object
59
f.write(b"<< /Length 25 >>\n")
61
f.write(b"BT\n/F1 12 Tf\n72 720 Td\n(Hello, world!) Tj\nET\n")
62
f.write(b"endstream\n")
64
# Write the cross-reference table
67
f.write(b"0000000000 65535 f \n")
68
f.write(b"0000000017 00000 n \n")
69
f.write(b"0000000073 00000 n \n")
70
f.write(b"0000000123 00000 n \n")
71
f.write(b"0000000271 00000 n \n")
73
f.write(b"<< /Size 5 /Root 1 0 R >>\n")
74
f.write(b"startxref\n")
82
with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".docx") as f:
83
document = docx.Document()
84
document.add_paragraph(plain_text_str)
90
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f:
91
json.dump({"text": plain_text_str}, f)
96
root = ElementTree.Element("text")
97
root.text = plain_text_str
98
tree = ElementTree.ElementTree(root)
99
with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".xml") as f:
105
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".yaml") as f:
106
yaml.dump({"text": plain_text_str}, f)
111
html = BeautifulSoup(
113
"<head><title>This is a test</title></head>"
114
f"<body><p>{plain_text_str}</p></body>"
118
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".html") as f:
124
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".md") as f:
125
f.write(f"# {plain_text_str}!\n")
129
def mock_latex_file():
130
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".tex") as f:
132
r"\documentclass{article}"
141
respective_file_creation_functions = {
142
".txt": mock_text_file,
143
".csv": mock_csv_file,
144
".pdf": mock_pdf_file,
145
".docx": mock_docx_file,
146
".json": mock_json_file,
147
".xml": mock_xml_file,
148
".yaml": mock_yaml_file,
149
".html": mock_html_file,
151
".tex": mock_latex_file,
153
binary_files_extensions = [".pdf", ".docx"]
156
@pytest.mark.parametrize(
157
"file_extension, c_file_creator",
158
respective_file_creation_functions.items(),
160
def test_parsers(file_extension, c_file_creator):
161
created_file_path = Path(c_file_creator())
162
with open(created_file_path, "rb") as file:
163
loaded_text = decode_textual_file(file, os.path.splitext(file.name)[1], logger)
165
assert plain_text_str in loaded_text
167
should_be_binary = file_extension in binary_files_extensions
168
assert should_be_binary == is_file_binary_fn(file)
170
created_file_path.unlink() # cleanup