1
"""Routines for processing Windows .rc files."""
14
from __future__ import annotations
16
from enum import auto, Enum
18
from typing import Generator, Optional, NamedTuple, TextIO, TYPE_CHECKING, Union
20
from .utilities import Location, Message, unquote
41
class Token(NamedTuple):
50
(re.compile(r"[ \t]+"), TokenType.WHITESPACE),
51
(re.compile(r"[A-Za-z_][0-9A-Za-z_]*"), TokenType.KEYWORD),
52
(re.compile(r"[1-9][0-9]*|0(?:[xX][0-9A-Fa-f]+|[0-7]*)"), TokenType.NUMBER),
66
"""Simple RC file tokenizer, no preprocessor."""
73
def __init__(self, source: TextIO) -> None:
75
self.data = source.readline().rstrip("\n")
79
def __iter__(self) -> RCTokenizer:
82
def __next__(self) -> Token:
83
"""Gets a token from the file"""
84
if self.pos >= len(self.data):
85
self.data = self.source.readline()
88
self.data = self.data.rstrip("\n")
91
return Token(TokenType.NEWLINE, "\n", self.line - 1)
93
ch = self.data[self.pos]
95
token = self._handle_slash()
97
token = self._handle_instruction()
99
token = self._handle_quoted()
101
for matcher, token_type in _MATCHERS:
102
match = matcher.match(self.data[self.pos :])
104
self.pos += match.end()
105
token = Token(token_type, match.group(0), self.line)
109
token = Token(_OPMATCH.get(ch, TokenType.OPERATOR), ch, self.line)
113
def _handle_slash(self) -> Token:
115
if self.pos == len(self.data):
116
return Token(TokenType.OPERATOR, "/", self.line)
117
ch = self.data[self.pos]
120
return self._read_to_eol(TokenType.COMMENT, "/")
122
return self._handle_block_comment()
123
return Token(TokenType.OPERATOR, "/", self.line)
125
def _handle_instruction(self) -> Token:
126
return self._read_to_eol(TokenType.INSTRUCTION, "")
128
def _read_to_eol(self, token_type: TokenType, prefix: str) -> Token:
129
token = prefix + self.data[self.pos :]
130
self.pos = len(self.data)
131
return Token(token_type, token, self.line)
133
def _handle_block_comment(self) -> Token:
135
start_line = self.line
137
new_pos = self.data.find("*/", self.pos)
139
token += self.data[self.pos :]
140
self.data = self.source.readline()
142
raise ValueError("Unclosed block comment")
145
new_pos = self.data.find("*/")
147
token += self.data[self.pos : new_pos]
148
self.pos = new_pos + 2
149
return Token(TokenType.COMMENT, token, start_line)
151
def _handle_quoted(self) -> Token:
156
if new_pos == len(self.data):
157
raise ValueError("Unclosed quotes")
158
ch = self.data[new_pos]
166
token = self.data[self.pos : new_pos]
168
return Token(TokenType.QUOTED, token, self.line)
185
EXTRACT_SKIP_TYPES = {
187
TokenType.WHITESPACE,
189
TokenType.INSTRUCTION,
193
(TokenType.OPEN, "("),
194
(TokenType.QUOTED, None),
195
(TokenType.OPERATOR, ","),
196
(TokenType.QUOTED, None),
197
(TokenType.CLOSE, ")"),
203
tokenizer: RCTokenizer
206
self, filename: Union[str, os.PathLike], tokenizer: RCTokenizer
208
self.filename = str(filename)
209
self.tokenizer = tokenizer
211
def __iter__(self) -> _RCExtractor:
214
def __next__(self) -> tuple[Message, Location]:
221
token = next(self.tokenizer)
222
if token.type != TokenType.KEYWORD:
224
if token.value in RC_KEYWORDS:
225
result = self._try_get_simple()
230
def _try_get_simple(self) -> Optional[tuple[Message, Location]]:
232
token = next(self.tokenizer)
233
if token.type not in EXTRACT_SKIP_TYPES:
235
if token.type == TokenType.KEYWORD and token.value == "NC_":
236
return self._try_get_nc()
237
if token.type != TokenType.QUOTED:
239
message = unquote(token.value)
241
return Message(message, None), Location(self.filename, token.line, [])
244
def _try_get_nc(self) -> Optional[tuple[Message, Location]]:
245
location = Location(self.filename, self.tokenizer.line, [])
247
while len(buffer) < 5:
248
token = next(self.tokenizer)
249
if token.type in EXTRACT_SKIP_TYPES:
251
expected_type, expected_value = NC_TOKENS[len(buffer)]
252
if token.type != expected_type or (
253
expected_value is not None and token.value != expected_value
257
message = unquote(buffer[3].value)
258
context = unquote(buffer[1].value)
259
return Message(message, context), location
263
filename: Union[str, os.PathLike]
264
) -> Generator[tuple[Message, Location], None, None]:
265
"""Extracts messages from a single Windows .rc file"""
266
with open(filename, "rt", encoding="utf-16") as file:
267
tokenizer = RCTokenizer(file)
268
for result in _RCExtractor(filename, tokenizer):