1
from __future__ import annotations
7
from PIL.PdfParser import (
22
def test_text_encode_decode() -> None:
23
assert encode_text("abc") == b"\xFE\xFF\x00a\x00b\x00c"
24
assert decode_text(b"\xFE\xFF\x00a\x00b\x00c") == "abc"
25
assert decode_text(b"abc") == "abc"
26
assert decode_text(b"\x1B a \x1C") == "\u02D9 a \u02DD"
29
def test_indirect_refs() -> None:
30
assert IndirectReference(1, 2) == IndirectReference(1, 2)
31
assert IndirectReference(1, 2) != IndirectReference(1, 3)
32
assert IndirectReference(1, 2) != IndirectObjectDef(1, 2)
33
assert IndirectReference(1, 2) != (1, 2)
34
assert IndirectObjectDef(1, 2) == IndirectObjectDef(1, 2)
35
assert IndirectObjectDef(1, 2) != IndirectObjectDef(1, 3)
36
assert IndirectObjectDef(1, 2) != IndirectReference(1, 2)
37
assert IndirectObjectDef(1, 2) != (1, 2)
40
def test_parsing() -> None:
41
assert PdfParser.interpret_name(b"Name#23Hash") == b"Name#Hash"
42
assert PdfParser.interpret_name(b"Name#23Hash", as_text=True) == "Name#Hash"
43
assert PdfParser.get_value(b"1 2 R ", 0) == (IndirectReference(1, 2), 5)
44
assert PdfParser.get_value(b"true[", 0) == (True, 4)
45
assert PdfParser.get_value(b"false%", 0) == (False, 5)
46
assert PdfParser.get_value(b"null<", 0) == (None, 4)
47
assert PdfParser.get_value(b"%cmt\n %cmt\n 123\n", 0) == (123, 15)
48
assert PdfParser.get_value(b"<901FA3>", 0) == (b"\x90\x1F\xA3", 8)
49
assert PdfParser.get_value(b"asd < 9 0 1 f A > qwe", 3) == (b"\x90\x1F\xA0", 17)
50
assert PdfParser.get_value(b"(asd)", 0) == (b"asd", 5)
51
assert PdfParser.get_value(b"(asd(qwe)zxc)zzz(aaa)", 0) == (b"asd(qwe)zxc", 13)
52
assert PdfParser.get_value(b"(Two \\\nwords.)", 0) == (b"Two words.", 14)
53
assert PdfParser.get_value(b"(Two\nlines.)", 0) == (b"Two\nlines.", 12)
54
assert PdfParser.get_value(b"(Two\r\nlines.)", 0) == (b"Two\nlines.", 13)
55
assert PdfParser.get_value(b"(Two\\nlines.)", 0) == (b"Two\nlines.", 13)
56
assert PdfParser.get_value(b"(One\\(paren).", 0) == (b"One(paren", 12)
57
assert PdfParser.get_value(b"(One\\)paren).", 0) == (b"One)paren", 12)
58
assert PdfParser.get_value(b"(\\0053)", 0) == (b"\x053", 7)
59
assert PdfParser.get_value(b"(\\053)", 0) == (b"\x2B", 6)
60
assert PdfParser.get_value(b"(\\53)", 0) == (b"\x2B", 5)
61
assert PdfParser.get_value(b"(\\53a)", 0) == (b"\x2Ba", 6)
62
assert PdfParser.get_value(b"(\\1111)", 0) == (b"\x491", 7)
63
assert PdfParser.get_value(b" 123 (", 0) == (123, 4)
64
assert round(abs(PdfParser.get_value(b" 123.4 %", 0)[0] - 123.4), 7) == 0
65
assert PdfParser.get_value(b" 123.4 %", 0)[1] == 6
66
with pytest.raises(PdfFormatError):
67
PdfParser.get_value(b"]", 0)
68
d = PdfParser.get_value(b"<</Name (value) /N /V>>", 0)[0]
69
assert isinstance(d, PdfDict)
71
assert d.Name == "value"
72
assert d[b"Name"] == b"value"
73
assert d.N == PdfName("V")
74
a = PdfParser.get_value(b"[/Name (value) /N /V]", 0)[0]
75
assert isinstance(a, list)
77
assert a[0] == PdfName("Name")
78
s = PdfParser.get_value(
79
b"<</Name (value) /Length 5>>\nstream\nabcde\nendstream<<...", 0
81
assert isinstance(s, PdfStream)
82
assert s.dictionary.Name == "value"
83
assert s.decode() == b"abcde"
84
for name in ["CreationDate", "ModDate"]:
86
b"20180729214124": "20180729214124",
87
b"D:20180729214124": "20180729214124",
88
b"D:2018072921": "20180729210000",
89
b"D:20180729214124Z": "20180729214124",
90
b"D:20180729214124+08'00'": "20180729134124",
91
b"D:20180729214124-05'00'": "20180730024124",
93
b = b"<</" + name.encode() + b" (" + date + b")>>"
94
d = PdfParser.get_value(b, 0)[0]
95
assert time.strftime("%Y%m%d%H%M%S", getattr(d, name)) == value
98
def test_pdf_repr() -> None:
99
assert bytes(IndirectReference(1, 2)) == b"1 2 R"
100
assert bytes(IndirectObjectDef(*IndirectReference(1, 2))) == b"1 2 obj"
101
assert bytes(PdfName(b"Name#Hash")) == b"/Name#23Hash"
102
assert bytes(PdfName("Name#Hash")) == b"/Name#23Hash"
103
assert bytes(PdfDict({b"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
104
assert bytes(PdfDict({"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
105
assert pdf_repr(IndirectReference(1, 2)) == b"1 2 R"
106
assert pdf_repr(IndirectObjectDef(*IndirectReference(1, 2))) == b"1 2 obj"
107
assert pdf_repr(PdfName(b"Name#Hash")) == b"/Name#23Hash"
108
assert pdf_repr(PdfName("Name#Hash")) == b"/Name#23Hash"
110
pdf_repr(PdfDict({b"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
113
pdf_repr(PdfDict({"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
115
assert pdf_repr(123) == b"123"
116
assert pdf_repr(True) == b"true"
117
assert pdf_repr(False) == b"false"
118
assert pdf_repr(None) == b"null"
119
assert pdf_repr(b"a)/b\\(c") == rb"(a\)/b\\\(c)"
120
assert pdf_repr([123, True, {"a": PdfName(b"b")}]) == b"[ 123 true <<\n/a /b\n>> ]"
121
assert pdf_repr(PdfBinary(b"\x90\x1F\xA0")) == b"<901FA0>"
124
def test_duplicate_xref_entry() -> None:
125
pdf = PdfParser("Tests/images/duplicate_xref_entry.pdf")
126
assert pdf.xref_table.existing_entries[6][0] == 1197