Pillow

Форк
0
/
test_pdfparser.py 
127 строк · 5.4 Кб
1
from __future__ import annotations
2

3
import time
4

5
import pytest
6

7
from PIL.PdfParser import (
8
    IndirectObjectDef,
9
    IndirectReference,
10
    PdfBinary,
11
    PdfDict,
12
    PdfFormatError,
13
    PdfName,
14
    PdfParser,
15
    PdfStream,
16
    decode_text,
17
    encode_text,
18
    pdf_repr,
19
)
20

21

22
def test_text_encode_decode() -> None:
23
    assert encode_text("abc") == b"\xFE\xFF\x00a\x00b\x00c"
24
    assert decode_text(b"\xFE\xFF\x00a\x00b\x00c") == "abc"
25
    assert decode_text(b"abc") == "abc"
26
    assert decode_text(b"\x1B a \x1C") == "\u02D9 a \u02DD"
27

28

29
def test_indirect_refs() -> None:
30
    assert IndirectReference(1, 2) == IndirectReference(1, 2)
31
    assert IndirectReference(1, 2) != IndirectReference(1, 3)
32
    assert IndirectReference(1, 2) != IndirectObjectDef(1, 2)
33
    assert IndirectReference(1, 2) != (1, 2)
34
    assert IndirectObjectDef(1, 2) == IndirectObjectDef(1, 2)
35
    assert IndirectObjectDef(1, 2) != IndirectObjectDef(1, 3)
36
    assert IndirectObjectDef(1, 2) != IndirectReference(1, 2)
37
    assert IndirectObjectDef(1, 2) != (1, 2)
38

39

40
def test_parsing() -> None:
41
    assert PdfParser.interpret_name(b"Name#23Hash") == b"Name#Hash"
42
    assert PdfParser.interpret_name(b"Name#23Hash", as_text=True) == "Name#Hash"
43
    assert PdfParser.get_value(b"1 2 R ", 0) == (IndirectReference(1, 2), 5)
44
    assert PdfParser.get_value(b"true[", 0) == (True, 4)
45
    assert PdfParser.get_value(b"false%", 0) == (False, 5)
46
    assert PdfParser.get_value(b"null<", 0) == (None, 4)
47
    assert PdfParser.get_value(b"%cmt\n %cmt\n 123\n", 0) == (123, 15)
48
    assert PdfParser.get_value(b"<901FA3>", 0) == (b"\x90\x1F\xA3", 8)
49
    assert PdfParser.get_value(b"asd < 9 0 1 f A > qwe", 3) == (b"\x90\x1F\xA0", 17)
50
    assert PdfParser.get_value(b"(asd)", 0) == (b"asd", 5)
51
    assert PdfParser.get_value(b"(asd(qwe)zxc)zzz(aaa)", 0) == (b"asd(qwe)zxc", 13)
52
    assert PdfParser.get_value(b"(Two \\\nwords.)", 0) == (b"Two words.", 14)
53
    assert PdfParser.get_value(b"(Two\nlines.)", 0) == (b"Two\nlines.", 12)
54
    assert PdfParser.get_value(b"(Two\r\nlines.)", 0) == (b"Two\nlines.", 13)
55
    assert PdfParser.get_value(b"(Two\\nlines.)", 0) == (b"Two\nlines.", 13)
56
    assert PdfParser.get_value(b"(One\\(paren).", 0) == (b"One(paren", 12)
57
    assert PdfParser.get_value(b"(One\\)paren).", 0) == (b"One)paren", 12)
58
    assert PdfParser.get_value(b"(\\0053)", 0) == (b"\x053", 7)
59
    assert PdfParser.get_value(b"(\\053)", 0) == (b"\x2B", 6)
60
    assert PdfParser.get_value(b"(\\53)", 0) == (b"\x2B", 5)
61
    assert PdfParser.get_value(b"(\\53a)", 0) == (b"\x2Ba", 6)
62
    assert PdfParser.get_value(b"(\\1111)", 0) == (b"\x491", 7)
63
    assert PdfParser.get_value(b" 123 (", 0) == (123, 4)
64
    assert round(abs(PdfParser.get_value(b" 123.4 %", 0)[0] - 123.4), 7) == 0
65
    assert PdfParser.get_value(b" 123.4 %", 0)[1] == 6
66
    with pytest.raises(PdfFormatError):
67
        PdfParser.get_value(b"]", 0)
68
    d = PdfParser.get_value(b"<</Name (value) /N /V>>", 0)[0]
69
    assert isinstance(d, PdfDict)
70
    assert len(d) == 2
71
    assert d.Name == "value"
72
    assert d[b"Name"] == b"value"
73
    assert d.N == PdfName("V")
74
    a = PdfParser.get_value(b"[/Name (value) /N /V]", 0)[0]
75
    assert isinstance(a, list)
76
    assert len(a) == 4
77
    assert a[0] == PdfName("Name")
78
    s = PdfParser.get_value(
79
        b"<</Name (value) /Length 5>>\nstream\nabcde\nendstream<<...", 0
80
    )[0]
81
    assert isinstance(s, PdfStream)
82
    assert s.dictionary.Name == "value"
83
    assert s.decode() == b"abcde"
84
    for name in ["CreationDate", "ModDate"]:
85
        for date, value in {
86
            b"20180729214124": "20180729214124",
87
            b"D:20180729214124": "20180729214124",
88
            b"D:2018072921": "20180729210000",
89
            b"D:20180729214124Z": "20180729214124",
90
            b"D:20180729214124+08'00'": "20180729134124",
91
            b"D:20180729214124-05'00'": "20180730024124",
92
        }.items():
93
            b = b"<</" + name.encode() + b" (" + date + b")>>"
94
            d = PdfParser.get_value(b, 0)[0]
95
            assert time.strftime("%Y%m%d%H%M%S", getattr(d, name)) == value
96

97

98
def test_pdf_repr() -> None:
99
    assert bytes(IndirectReference(1, 2)) == b"1 2 R"
100
    assert bytes(IndirectObjectDef(*IndirectReference(1, 2))) == b"1 2 obj"
101
    assert bytes(PdfName(b"Name#Hash")) == b"/Name#23Hash"
102
    assert bytes(PdfName("Name#Hash")) == b"/Name#23Hash"
103
    assert bytes(PdfDict({b"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
104
    assert bytes(PdfDict({"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
105
    assert pdf_repr(IndirectReference(1, 2)) == b"1 2 R"
106
    assert pdf_repr(IndirectObjectDef(*IndirectReference(1, 2))) == b"1 2 obj"
107
    assert pdf_repr(PdfName(b"Name#Hash")) == b"/Name#23Hash"
108
    assert pdf_repr(PdfName("Name#Hash")) == b"/Name#23Hash"
109
    assert (
110
        pdf_repr(PdfDict({b"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
111
    )
112
    assert (
113
        pdf_repr(PdfDict({"Name": IndirectReference(1, 2)})) == b"<<\n/Name 1 2 R\n>>"
114
    )
115
    assert pdf_repr(123) == b"123"
116
    assert pdf_repr(True) == b"true"
117
    assert pdf_repr(False) == b"false"
118
    assert pdf_repr(None) == b"null"
119
    assert pdf_repr(b"a)/b\\(c") == rb"(a\)/b\\\(c)"
120
    assert pdf_repr([123, True, {"a": PdfName(b"b")}]) == b"[ 123 true <<\n/a /b\n>> ]"
121
    assert pdf_repr(PdfBinary(b"\x90\x1F\xA0")) == b"<901FA0>"
122

123

124
def test_duplicate_xref_entry() -> None:
125
    pdf = PdfParser("Tests/images/duplicate_xref_entry.pdf")
126
    assert pdf.xref_table.existing_entries[6][0] == 1197
127
    pdf.close()
128

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.