1
# cython: infer_types=True
8
cython.declare(make_lexicon=object, lexicon=object,
9
print_function=object, error=object, warning=object,
10
os=object, platform=object)
14
from unicodedata import normalize
15
from contextlib import contextmanager
18
from ..Plex.Scanners import Scanner
19
from ..Plex.Errors import UnrecognizedInput
20
from .Errors import error, warning, hold_errors, release_errors, CompileError
21
from .Lexicon import any_string_prefix, make_lexicon, IDENT
22
from .Future import print_function
26
scanner_debug_flags = 0
27
scanner_dump_file = None
35
lexicon = make_lexicon()
39
#------------------------------------------------------------------
42
"global", "nonlocal", "def", "class", "print", "del", "pass", "break",
43
"continue", "return", "raise", "import", "exec", "try",
44
"except", "finally", "while", "if", "elif", "else", "for",
45
"in", "assert", "and", "or", "not", "is", "lambda",
46
"from", "yield", "with",
49
pyx_reserved_words = py_reserved_words + [
50
"include", "ctypedef", "cdef", "cpdef",
51
"cimport", "DEF", "IF", "ELIF", "ELSE"
55
#------------------------------------------------------------------
57
class CompileTimeScope:
59
def __init__(self, outer=None):
63
def declare(self, name, value):
64
self.entries[name] = value
66
def update(self, other):
67
self.entries.update(other)
69
def lookup_here(self, name):
70
return self.entries[name]
72
def __contains__(self, name):
73
return name in self.entries
75
def lookup(self, name):
77
return self.lookup_here(name)
81
return outer.lookup(name)
86
def initial_compile_time_env():
87
benv = CompileTimeScope()
88
names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE')
89
for name, value in zip(names, platform.uname()):
90
benv.declare(name, value)
95
'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
96
'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
97
'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
98
'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
99
'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
100
'sum', 'tuple', 'zip',
101
### defined below in a platform independent way
102
# 'long', 'unicode', 'reduce', 'xrange'
107
benv.declare(name, getattr(builtins, name))
108
except AttributeError:
113
from functools import reduce
114
benv.declare('reduce', reduce)
115
benv.declare('unicode', str)
116
benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int')))
117
benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range')))
119
denv = CompileTimeScope(benv)
123
#------------------------------------------------------------------
125
class SourceDescriptor:
127
A SourceDescriptor should be considered immutable.
130
in_utility_code = False
134
_escaped_description = None
137
assert False # To catch all places where a descriptor is used directly as a filename
139
def set_file_type_from_name(self, filename):
140
name, ext = os.path.splitext(filename)
141
self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
143
def is_cython_file(self):
144
return self._file_type in ('pyx', 'pxd')
146
def is_python_file(self):
147
return self._file_type == 'py'
149
def get_escaped_description(self):
150
if self._escaped_description is None:
151
# Use forward slashes on Windows since these paths
152
# will be used in the #line directives in the C/C++ files.
153
self._escaped_description = self.get_description().replace('\\', '/')
154
return self._escaped_description
156
def __gt__(self, other):
157
# this is only used to provide some sort of order
159
return self._cmp_name > other._cmp_name
160
except AttributeError:
163
def __lt__(self, other):
164
# this is only used to provide some sort of order
166
return self._cmp_name < other._cmp_name
167
except AttributeError:
170
def __le__(self, other):
171
# this is only used to provide some sort of order
173
return self._cmp_name <= other._cmp_name
174
except AttributeError:
178
return self # immutable, no need to copy
180
def __deepcopy__(self, memo):
181
return self # immutable, no need to copy
184
class FileSourceDescriptor(SourceDescriptor):
186
Represents a code source. A code source is a more generic abstraction
187
for a "filename" (as sometimes the code doesn't come from a file).
188
Instances of code sources are passed to Scanner.__init__ as the
189
optional name argument and will be passed back when asking for
190
the position()-tuple.
192
def __init__(self, filename, path_description=None):
193
filename = Utils.decode_filename(filename)
194
self.filename = filename
195
self.path_description = path_description or filename
197
self._short_path_description = os.path.relpath(self.path_description)
199
# path not under current directory => use complete file path
200
self._short_path_description = self.path_description
201
# Prefer relative paths to current directory (which is most likely the project root) over absolute paths.
202
workdir = os.path.abspath('.') + os.sep
203
self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename
204
self.set_file_type_from_name(filename)
205
self._cmp_name = filename
208
def get_lines(self, encoding=None, error_handling=None):
209
# we cache the lines only the second time this is called, in
210
# order to save memory when they are only used once
211
key = (encoding, error_handling)
213
lines = self._lines[key]
214
if lines is not None:
219
with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f:
220
lines = f.readlines()
222
if key in self._lines:
223
self._lines[key] = lines
225
# do not cache the first access, but remember that we
226
# already read it once
227
self._lines[key] = None
230
def get_description(self):
231
return self._short_path_description
233
def get_error_description(self):
235
cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
236
if path.startswith(cwd):
237
return path[len(cwd):]
240
def get_filenametable_entry(self):
241
return self.file_path
243
def __eq__(self, other):
244
return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
247
return hash(self.filename)
250
return "<FileSourceDescriptor:%s>" % self.filename
253
class StringSourceDescriptor(SourceDescriptor):
255
Instances of this class can be used instead of a filenames if the
256
code originates from a string object.
258
def __init__(self, name, code):
260
#self.set_file_type_from_name(name)
261
self.codelines = [x + "\n" for x in code.split("\n")]
262
self._cmp_name = name
264
def get_lines(self, encoding=None, error_handling=None):
266
return self.codelines
268
return [line.encode(encoding, error_handling).decode(encoding)
269
for line in self.codelines]
271
def get_description(self):
274
get_error_description = get_description
276
def get_filenametable_entry(self):
277
return "<stringsource>"
281
# Do not hash on the name, an identical string source should be the
282
# same object (name is often defaulted in other places)
283
# return hash(self.name)
285
def __eq__(self, other):
286
return isinstance(other, StringSourceDescriptor) and self.name == other.name
289
return "<StringSourceDescriptor:%s>" % self.name
292
#------------------------------------------------------------------
294
class PyrexScanner(Scanner):
295
# context Context Compilation context
296
# included_files [string] Files included with 'include' statement
297
# compile_time_env dict Environment for conditional compilation
298
# compile_time_eval boolean In a true conditional compilation context
299
# compile_time_expr boolean In a compile-time expression context
300
# put_back_on_failure list or None If set, this records states so the tentatively_scan
301
# contextmanager can restore it
303
def __init__(self, file, filename, parent_scanner=None,
304
scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
305
Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
307
if filename.is_python_file():
308
self.in_python_file = True
309
keywords = py_reserved_words
311
self.in_python_file = False
312
keywords = pyx_reserved_words
313
self.keywords = {keyword: keyword for keyword in keywords}
315
self.async_enabled = 0
318
self.context = parent_scanner.context
319
self.included_files = parent_scanner.included_files
320
self.compile_time_env = parent_scanner.compile_time_env
321
self.compile_time_eval = parent_scanner.compile_time_eval
322
self.compile_time_expr = parent_scanner.compile_time_expr
324
if parent_scanner.async_enabled:
327
self.context = context
328
self.included_files = scope.included_files
329
self.compile_time_env = initial_compile_time_env()
330
self.compile_time_eval = 1
331
self.compile_time_expr = 0
332
if getattr(context.options, 'compile_time_env', None):
333
self.compile_time_env.update(context.options.compile_time_env)
334
self.parse_comments = parse_comments
335
self.source_encoding = source_encoding
336
self.trace = trace_scanner
337
self.indentation_stack = [0]
338
self.indentation_char = '\0'
339
self.bracket_nesting_level = 0
341
self.put_back_on_failure = None
347
def normalize_ident(self, text):
348
if not text.isascii():
349
text = normalize('NFKC', text)
350
self.produce(IDENT, text)
352
def commentline(self, text):
353
if self.parse_comments:
354
self.produce('commentline', text)
356
def strip_underscores(self, text, symbol):
357
self.produce(symbol, text.replace('_', ''))
359
def current_level(self):
360
return self.indentation_stack[-1]
362
def open_bracket_action(self, text):
363
self.bracket_nesting_level += 1
366
def close_bracket_action(self, text):
367
self.bracket_nesting_level -= 1
370
def newline_action(self, text):
371
if self.bracket_nesting_level == 0:
373
self.produce('NEWLINE', '')
382
def begin_string_action(self, text: str):
383
while text and text[0] in any_string_prefix:
385
self.begin(self.string_states[text])
386
self.produce('BEGIN_STRING')
388
def end_string_action(self, text):
390
self.produce('END_STRING')
392
def unclosed_string_action(self, text):
393
self.end_string_action(text)
394
self.error_at_scanpos("Unclosed string literal")
396
def indentation_action(self, text: str):
398
# Indentation within brackets should be ignored.
399
#if self.bracket_nesting_level > 0:
401
# Check that tabs and spaces are being used consistently.
404
#print "Scanner.indentation_action: indent with", repr(c) ###
405
if self.indentation_char == '\0':
406
self.indentation_char = c
407
#print "Scanner.indentation_action: setting indent_char to", repr(c)
409
if self.indentation_char != c:
410
self.error_at_scanpos("Mixed use of tabs and spaces")
411
if text.replace(c, "") != "":
412
self.error_at_scanpos("Mixed use of tabs and spaces")
413
# Figure out how many indents/dedents to do
414
current_level: cython.Py_ssize_t = self.current_level()
415
new_level: cython.Py_ssize_t = len(text)
416
#print "Changing indent level from", current_level, "to", new_level ###
417
if new_level == current_level:
419
elif new_level > current_level:
420
#print "...pushing level", new_level ###
421
self.indentation_stack.append(new_level)
422
self.produce('INDENT', '')
424
while new_level < self.current_level():
425
#print "...popping level", self.indentation_stack[-1] ###
426
self.indentation_stack.pop()
427
self.produce('DEDENT', '')
428
#print "...current level now", self.current_level() ###
429
if new_level != self.current_level():
430
self.error_at_scanpos("Inconsistent indentation")
432
def eof_action(self, text):
433
while len(self.indentation_stack) > 1:
434
self.produce('DEDENT', '')
435
self.indentation_stack.pop()
436
self.produce('EOF', '')
440
sy, systring = self.read()
441
except UnrecognizedInput:
442
self.error_at_scanpos("Unrecognized character")
443
return # just a marker, error() always raises
445
if systring in self.keywords:
446
if systring == 'print' and print_function in self.context.future_directives:
447
self.keywords.pop('print', None)
448
elif systring == 'exec' and self.context.language_level >= 3:
449
self.keywords.pop('exec', None)
451
sy = self.keywords[systring] # intern
452
systring = self.context.intern_ustring(systring)
453
if self.put_back_on_failure is not None:
454
self.put_back_on_failure.append((sy, systring, self.position()))
456
self.systring = systring
457
if False: # debug_scanner:
458
_, line, col = self.position()
459
if not self.systring or self.sy == self.systring:
462
t = "%s %s" % (self.sy, self.systring)
463
print("--- %3d %2d %s" % (line, col, t))
466
saved = self.sy, self.systring
467
saved_pos = self.position()
469
next = self.sy, self.systring
470
self.unread(self.sy, self.systring, self.position())
471
self.sy, self.systring = saved
472
self.last_token_position_tuple = saved_pos
475
def put_back(self, sy, systring, pos):
476
self.unread(self.sy, self.systring, self.last_token_position_tuple)
478
self.systring = systring
479
self.last_token_position_tuple = pos
482
def error(self, message, pos=None, fatal=True):
484
pos = self.position()
485
if self.sy == 'INDENT':
486
error(pos, "Possible inconsistent indentation")
487
err = error(pos, message)
490
def error_at_scanpos(self, message):
491
# Like error(fatal=True), but gets the current scanning position rather than
492
# the position of the last token read.
493
pos = self.get_current_scan_pos()
494
self.error(message, pos, True)
496
def expect(self, what, message=None):
500
self.expected(what, message)
502
def expect_keyword(self, what, message=None):
503
if self.sy == IDENT and self.systring == what:
506
self.expected(what, message)
508
def expected(self, what, message=None):
513
found = self.systring
516
self.error("Expected '%s', found '%s'" % (what, found))
518
def expect_indent(self):
519
self.expect('INDENT', "Expected an increase in indentation level")
521
def expect_dedent(self):
522
self.expect('DEDENT', "Expected a decrease in indentation level")
524
def expect_newline(self, message="Expected a newline", ignore_semicolon: cython.bint = False):
525
# Expect either a newline or end of file
526
useless_trailing_semicolon = None
527
if ignore_semicolon and self.sy == ';':
528
useless_trailing_semicolon = self.position()
531
self.expect('NEWLINE', message)
532
if useless_trailing_semicolon is not None:
533
warning(useless_trailing_semicolon, "useless trailing semicolon")
535
def enter_async(self):
536
self.async_enabled += 1
537
if self.async_enabled == 1:
538
self.keywords['async'] = 'async'
539
self.keywords['await'] = 'await'
541
def exit_async(self):
542
assert self.async_enabled > 0
543
self.async_enabled -= 1
544
if not self.async_enabled:
545
del self.keywords['await']
546
del self.keywords['async']
547
if self.sy in ('async', 'await'):
548
self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy)
551
def tentatively_scan(scanner: PyrexScanner):
552
errors = hold_errors()
554
put_back_on_failure = scanner.put_back_on_failure
555
scanner.put_back_on_failure = []
556
initial_state = (scanner.sy, scanner.systring, scanner.position())
559
except CompileError as e:
563
if scanner.put_back_on_failure:
564
for put_back in reversed(scanner.put_back_on_failure[:-1]):
565
scanner.put_back(*put_back)
566
# we need to restore the initial state too
567
scanner.put_back(*initial_state)
568
elif put_back_on_failure is not None:
569
# the outer "tentatively_scan" block that we're in might still
570
# want to undo this block
571
put_back_on_failure.extend(scanner.put_back_on_failure)
572
scanner.put_back_on_failure = put_back_on_failure
574
release_errors(ignore=True)