cython

Форк
0
/
Scanning.py 
574 строки · 19.4 Кб
1
# cython: infer_types=True
2
#
3
#   Cython Scanner
4
#
5

6

7
import cython
8
cython.declare(make_lexicon=object, lexicon=object,
9
               print_function=object, error=object, warning=object,
10
               os=object, platform=object)
11

12
import os
13
import platform
14
from unicodedata import normalize
15
from contextlib import contextmanager
16

17
from .. import Utils
18
from ..Plex.Scanners import Scanner
19
from ..Plex.Errors import UnrecognizedInput
20
from .Errors import error, warning, hold_errors, release_errors, CompileError
21
from .Lexicon import any_string_prefix, make_lexicon, IDENT
22
from .Future import print_function
23

24
debug_scanner = 0
25
trace_scanner = 0
26
scanner_debug_flags = 0
27
scanner_dump_file = None
28

29
lexicon = None
30

31

32
def get_lexicon():
33
    global lexicon
34
    if not lexicon:
35
        lexicon = make_lexicon()
36
    return lexicon
37

38

39
#------------------------------------------------------------------
40

41
py_reserved_words = [
42
    "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
43
    "continue", "return", "raise", "import", "exec", "try",
44
    "except", "finally", "while", "if", "elif", "else", "for",
45
    "in", "assert", "and", "or", "not", "is", "lambda",
46
    "from", "yield", "with",
47
]
48

49
pyx_reserved_words = py_reserved_words + [
50
    "include", "ctypedef", "cdef", "cpdef",
51
    "cimport", "DEF", "IF", "ELIF", "ELSE"
52
]
53

54

55
#------------------------------------------------------------------
56

57
class CompileTimeScope:
58

59
    def __init__(self, outer=None):
60
        self.entries = {}
61
        self.outer = outer
62

63
    def declare(self, name, value):
64
        self.entries[name] = value
65

66
    def update(self, other):
67
        self.entries.update(other)
68

69
    def lookup_here(self, name):
70
        return self.entries[name]
71

72
    def __contains__(self, name):
73
        return name in self.entries
74

75
    def lookup(self, name):
76
        try:
77
            return self.lookup_here(name)
78
        except KeyError:
79
            outer = self.outer
80
            if outer:
81
                return outer.lookup(name)
82
            else:
83
                raise
84

85

86
def initial_compile_time_env():
87
    benv = CompileTimeScope()
88
    names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE')
89
    for name, value in zip(names, platform.uname()):
90
        benv.declare(name, value)
91
    import builtins
92

93
    names = (
94
        'False', 'True',
95
        'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
96
        'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
97
        'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
98
        'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
99
        'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
100
        'sum', 'tuple', 'zip',
101
        ### defined below in a platform independent way
102
        # 'long', 'unicode', 'reduce', 'xrange'
103
    )
104

105
    for name in names:
106
        try:
107
            benv.declare(name, getattr(builtins, name))
108
        except AttributeError:
109
            # ignore, likely Py3
110
            pass
111

112
    # Py2/3 adaptations
113
    from functools import reduce
114
    benv.declare('reduce', reduce)
115
    benv.declare('unicode', str)
116
    benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int')))
117
    benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range')))
118

119
    denv = CompileTimeScope(benv)
120
    return denv
121

122

123
#------------------------------------------------------------------
124

125
class SourceDescriptor:
126
    """
127
    A SourceDescriptor should be considered immutable.
128
    """
129
    filename = None
130
    in_utility_code = False
131

132
    _file_type = 'pyx'
133

134
    _escaped_description = None
135
    _cmp_name = ''
136
    def __str__(self):
137
        assert False  # To catch all places where a descriptor is used directly as a filename
138

139
    def set_file_type_from_name(self, filename):
140
        name, ext = os.path.splitext(filename)
141
        self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
142

143
    def is_cython_file(self):
144
        return self._file_type in ('pyx', 'pxd')
145

146
    def is_python_file(self):
147
        return self._file_type == 'py'
148

149
    def get_escaped_description(self):
150
        if self._escaped_description is None:
151
            # Use forward slashes on Windows since these paths
152
            # will be used in the #line directives in the C/C++ files.
153
            self._escaped_description = self.get_description().replace('\\', '/')
154
        return self._escaped_description
155

156
    def __gt__(self, other):
157
        # this is only used to provide some sort of order
158
        try:
159
            return self._cmp_name > other._cmp_name
160
        except AttributeError:
161
            return False
162

163
    def __lt__(self, other):
164
        # this is only used to provide some sort of order
165
        try:
166
            return self._cmp_name < other._cmp_name
167
        except AttributeError:
168
            return False
169

170
    def __le__(self, other):
171
        # this is only used to provide some sort of order
172
        try:
173
            return self._cmp_name <= other._cmp_name
174
        except AttributeError:
175
            return False
176

177
    def __copy__(self):
178
        return self  # immutable, no need to copy
179

180
    def __deepcopy__(self, memo):
181
        return self  # immutable, no need to copy
182

183

184
class FileSourceDescriptor(SourceDescriptor):
185
    """
186
    Represents a code source. A code source is a more generic abstraction
187
    for a "filename" (as sometimes the code doesn't come from a file).
188
    Instances of code sources are passed to Scanner.__init__ as the
189
    optional name argument and will be passed back when asking for
190
    the position()-tuple.
191
    """
192
    def __init__(self, filename, path_description=None):
193
        filename = Utils.decode_filename(filename)
194
        self.filename = filename
195
        self.path_description = path_description or filename
196
        try:
197
            self._short_path_description = os.path.relpath(self.path_description)
198
        except ValueError:
199
            # path not under current directory => use complete file path
200
            self._short_path_description = self.path_description
201
        # Prefer relative paths to current directory (which is most likely the project root) over absolute paths.
202
        workdir = os.path.abspath('.') + os.sep
203
        self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename
204
        self.set_file_type_from_name(filename)
205
        self._cmp_name = filename
206
        self._lines = {}
207

208
    def get_lines(self, encoding=None, error_handling=None):
209
        # we cache the lines only the second time this is called, in
210
        # order to save memory when they are only used once
211
        key = (encoding, error_handling)
212
        try:
213
            lines = self._lines[key]
214
            if lines is not None:
215
                return lines
216
        except KeyError:
217
            pass
218

219
        with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f:
220
            lines = f.readlines()
221

222
        if key in self._lines:
223
            self._lines[key] = lines
224
        else:
225
            # do not cache the first access, but remember that we
226
            # already read it once
227
            self._lines[key] = None
228
        return lines
229

230
    def get_description(self):
231
        return self._short_path_description
232

233
    def get_error_description(self):
234
        path = self.filename
235
        cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
236
        if path.startswith(cwd):
237
            return path[len(cwd):]
238
        return path
239

240
    def get_filenametable_entry(self):
241
        return self.file_path
242

243
    def __eq__(self, other):
244
        return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
245

246
    def __hash__(self):
247
        return hash(self.filename)
248

249
    def __repr__(self):
250
        return "<FileSourceDescriptor:%s>" % self.filename
251

252

253
class StringSourceDescriptor(SourceDescriptor):
254
    """
255
    Instances of this class can be used instead of a filenames if the
256
    code originates from a string object.
257
    """
258
    def __init__(self, name, code):
259
        self.name = name
260
        #self.set_file_type_from_name(name)
261
        self.codelines = [x + "\n" for x in code.split("\n")]
262
        self._cmp_name = name
263

264
    def get_lines(self, encoding=None, error_handling=None):
265
        if not encoding:
266
            return self.codelines
267
        else:
268
            return [line.encode(encoding, error_handling).decode(encoding)
269
                    for line in self.codelines]
270

271
    def get_description(self):
272
        return self.name
273

274
    get_error_description = get_description
275

276
    def get_filenametable_entry(self):
277
        return "<stringsource>"
278

279
    def __hash__(self):
280
        return id(self)
281
        # Do not hash on the name, an identical string source should be the
282
        # same object (name is often defaulted in other places)
283
        # return hash(self.name)
284

285
    def __eq__(self, other):
286
        return isinstance(other, StringSourceDescriptor) and self.name == other.name
287

288
    def __repr__(self):
289
        return "<StringSourceDescriptor:%s>" % self.name
290

291

292
#------------------------------------------------------------------
293

294
class PyrexScanner(Scanner):
295
    #  context            Context  Compilation context
296
    #  included_files     [string] Files included with 'include' statement
297
    #  compile_time_env   dict     Environment for conditional compilation
298
    #  compile_time_eval  boolean  In a true conditional compilation context
299
    #  compile_time_expr  boolean  In a compile-time expression context
300
    #  put_back_on_failure  list or None  If set, this records states so the tentatively_scan
301
    #                                       contextmanager can restore it
302

303
    def __init__(self, file, filename, parent_scanner=None,
304
                 scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
305
        Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
306

307
        if filename.is_python_file():
308
            self.in_python_file = True
309
            keywords = py_reserved_words
310
        else:
311
            self.in_python_file = False
312
            keywords = pyx_reserved_words
313
        self.keywords = {keyword: keyword for keyword in keywords}
314

315
        self.async_enabled = 0
316

317
        if parent_scanner:
318
            self.context = parent_scanner.context
319
            self.included_files = parent_scanner.included_files
320
            self.compile_time_env = parent_scanner.compile_time_env
321
            self.compile_time_eval = parent_scanner.compile_time_eval
322
            self.compile_time_expr = parent_scanner.compile_time_expr
323

324
            if parent_scanner.async_enabled:
325
                self.enter_async()
326
        else:
327
            self.context = context
328
            self.included_files = scope.included_files
329
            self.compile_time_env = initial_compile_time_env()
330
            self.compile_time_eval = 1
331
            self.compile_time_expr = 0
332
            if getattr(context.options, 'compile_time_env', None):
333
                self.compile_time_env.update(context.options.compile_time_env)
334
        self.parse_comments = parse_comments
335
        self.source_encoding = source_encoding
336
        self.trace = trace_scanner
337
        self.indentation_stack = [0]
338
        self.indentation_char = '\0'
339
        self.bracket_nesting_level = 0
340

341
        self.put_back_on_failure = None
342

343
        self.begin('INDENT')
344
        self.sy = ''
345
        self.next()
346

347
    def normalize_ident(self, text):
348
        if not text.isascii():
349
            text = normalize('NFKC', text)
350
        self.produce(IDENT, text)
351

352
    def commentline(self, text):
353
        if self.parse_comments:
354
            self.produce('commentline', text)
355

356
    def strip_underscores(self, text, symbol):
357
        self.produce(symbol, text.replace('_', ''))
358

359
    def current_level(self):
360
        return self.indentation_stack[-1]
361

362
    def open_bracket_action(self, text):
363
        self.bracket_nesting_level += 1
364
        return text
365

366
    def close_bracket_action(self, text):
367
        self.bracket_nesting_level -= 1
368
        return text
369

370
    def newline_action(self, text):
371
        if self.bracket_nesting_level == 0:
372
            self.begin('INDENT')
373
            self.produce('NEWLINE', '')
374

375
    string_states = {
376
        "'":   'SQ_STRING',
377
        '"':   'DQ_STRING',
378
        "'''": 'TSQ_STRING',
379
        '"""': 'TDQ_STRING'
380
    }
381

382
    def begin_string_action(self, text: str):
383
        while text and text[0] in any_string_prefix:
384
            text = text[1:]
385
        self.begin(self.string_states[text])
386
        self.produce('BEGIN_STRING')
387

388
    def end_string_action(self, text):
389
        self.begin('')
390
        self.produce('END_STRING')
391

392
    def unclosed_string_action(self, text):
393
        self.end_string_action(text)
394
        self.error_at_scanpos("Unclosed string literal")
395

396
    def indentation_action(self, text: str):
397
        self.begin('')
398
        # Indentation within brackets should be ignored.
399
        #if self.bracket_nesting_level > 0:
400
        #    return
401
        # Check that tabs and spaces are being used consistently.
402
        if text:
403
            c = text[0]
404
            #print "Scanner.indentation_action: indent with", repr(c) ###
405
            if self.indentation_char == '\0':
406
                self.indentation_char = c
407
                #print "Scanner.indentation_action: setting indent_char to", repr(c)
408
            else:
409
                if self.indentation_char != c:
410
                    self.error_at_scanpos("Mixed use of tabs and spaces")
411
            if text.replace(c, "") != "":
412
                self.error_at_scanpos("Mixed use of tabs and spaces")
413
        # Figure out how many indents/dedents to do
414
        current_level: cython.Py_ssize_t = self.current_level()
415
        new_level: cython.Py_ssize_t = len(text)
416
        #print "Changing indent level from", current_level, "to", new_level ###
417
        if new_level == current_level:
418
            return
419
        elif new_level > current_level:
420
            #print "...pushing level", new_level ###
421
            self.indentation_stack.append(new_level)
422
            self.produce('INDENT', '')
423
        else:
424
            while new_level < self.current_level():
425
                #print "...popping level", self.indentation_stack[-1] ###
426
                self.indentation_stack.pop()
427
                self.produce('DEDENT', '')
428
            #print "...current level now", self.current_level() ###
429
            if new_level != self.current_level():
430
                self.error_at_scanpos("Inconsistent indentation")
431

432
    def eof_action(self, text):
433
        while len(self.indentation_stack) > 1:
434
            self.produce('DEDENT', '')
435
            self.indentation_stack.pop()
436
        self.produce('EOF', '')
437

438
    def next(self):
439
        try:
440
            sy, systring = self.read()
441
        except UnrecognizedInput:
442
            self.error_at_scanpos("Unrecognized character")
443
            return  # just a marker, error() always raises
444
        if sy == IDENT:
445
            if systring in self.keywords:
446
                if systring == 'print' and print_function in self.context.future_directives:
447
                    self.keywords.pop('print', None)
448
                elif systring == 'exec' and self.context.language_level >= 3:
449
                    self.keywords.pop('exec', None)
450
                else:
451
                    sy = self.keywords[systring]  # intern
452
            systring = self.context.intern_ustring(systring)
453
        if self.put_back_on_failure is not None:
454
            self.put_back_on_failure.append((sy, systring, self.position()))
455
        self.sy = sy
456
        self.systring = systring
457
        if False:  # debug_scanner:
458
            _, line, col = self.position()
459
            if not self.systring or self.sy == self.systring:
460
                t = self.sy
461
            else:
462
                t = "%s %s" % (self.sy, self.systring)
463
            print("--- %3d %2d %s" % (line, col, t))
464

465
    def peek(self):
466
        saved = self.sy, self.systring
467
        saved_pos = self.position()
468
        self.next()
469
        next = self.sy, self.systring
470
        self.unread(self.sy, self.systring, self.position())
471
        self.sy, self.systring = saved
472
        self.last_token_position_tuple = saved_pos
473
        return next
474

475
    def put_back(self, sy, systring, pos):
476
        self.unread(self.sy, self.systring, self.last_token_position_tuple)
477
        self.sy = sy
478
        self.systring = systring
479
        self.last_token_position_tuple = pos
480

481

482
    def error(self, message, pos=None, fatal=True):
483
        if pos is None:
484
            pos = self.position()
485
        if self.sy == 'INDENT':
486
            error(pos, "Possible inconsistent indentation")
487
        err = error(pos, message)
488
        if fatal: raise err
489

490
    def error_at_scanpos(self, message):
491
        # Like error(fatal=True), but gets the current scanning position rather than
492
        # the position of the last token read.
493
        pos = self.get_current_scan_pos()
494
        self.error(message, pos, True)
495

496
    def expect(self, what, message=None):
497
        if self.sy == what:
498
            self.next()
499
        else:
500
            self.expected(what, message)
501

502
    def expect_keyword(self, what, message=None):
503
        if self.sy == IDENT and self.systring == what:
504
            self.next()
505
        else:
506
            self.expected(what, message)
507

508
    def expected(self, what, message=None):
509
        if message:
510
            self.error(message)
511
        else:
512
            if self.sy == IDENT:
513
                found = self.systring
514
            else:
515
                found = self.sy
516
            self.error("Expected '%s', found '%s'" % (what, found))
517

518
    def expect_indent(self):
519
        self.expect('INDENT', "Expected an increase in indentation level")
520

521
    def expect_dedent(self):
522
        self.expect('DEDENT', "Expected a decrease in indentation level")
523

524
    def expect_newline(self, message="Expected a newline", ignore_semicolon: cython.bint = False):
525
        # Expect either a newline or end of file
526
        useless_trailing_semicolon = None
527
        if ignore_semicolon and self.sy == ';':
528
            useless_trailing_semicolon = self.position()
529
            self.next()
530
        if self.sy != 'EOF':
531
            self.expect('NEWLINE', message)
532
        if useless_trailing_semicolon is not None:
533
            warning(useless_trailing_semicolon, "useless trailing semicolon")
534

535
    def enter_async(self):
536
        self.async_enabled += 1
537
        if self.async_enabled == 1:
538
            self.keywords['async'] = 'async'
539
            self.keywords['await'] = 'await'
540

541
    def exit_async(self):
542
        assert self.async_enabled > 0
543
        self.async_enabled -= 1
544
        if not self.async_enabled:
545
            del self.keywords['await']
546
            del self.keywords['async']
547
            if self.sy in ('async', 'await'):
548
                self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy)
549

550
@contextmanager
551
def tentatively_scan(scanner: PyrexScanner):
552
    errors = hold_errors()
553
    try:
554
        put_back_on_failure = scanner.put_back_on_failure
555
        scanner.put_back_on_failure = []
556
        initial_state = (scanner.sy, scanner.systring, scanner.position())
557
        try:
558
            yield errors
559
        except CompileError as e:
560
            pass
561
        finally:
562
            if errors:
563
                if scanner.put_back_on_failure:
564
                    for put_back in reversed(scanner.put_back_on_failure[:-1]):
565
                        scanner.put_back(*put_back)
566
                    # we need to restore the initial state too
567
                    scanner.put_back(*initial_state)
568
            elif put_back_on_failure is not None:
569
                # the outer "tentatively_scan" block that we're in might still
570
                # want to undo this block
571
                put_back_on_failure.extend(scanner.put_back_on_failure)
572
            scanner.put_back_on_failure = put_back_on_failure
573
    finally:
574
        release_errors(ignore=True)
575

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.