4
Copyright (c) 2021 МГТУ им. Н.Э. Баумана, кафедра ИУ-6, Михаил Фетисов,
6
https://bmstu.codes/lsx/simodo/loom
9
#include "simodo/parser/fuze/FuzeRdp.h"
10
#include "simodo/parser/fuze/FuzeSblRdp.h"
11
#include "simodo/parser/fuze/FuzeOperationCode.h"
12
#include "simodo/parser/fuze/fuze_file_extension.h"
13
#include "simodo/parser/fuze/fuze_keywords.h"
15
#include "simodo/inout/token/RegularInputStreamSupplier.h"
16
#include "simodo/inout/convert/functions.h"
17
#include "simodo/inout/format/fmt.h"
22
#if __cplusplus >= __cpp_2017
24
namespace fs = std::filesystem;
26
#include <experimental/filesystem>
27
namespace fs = std::filesystem::experimental;
30
namespace simodo::parser
33
inline static std::u16string INCLUDE_STRING {std::u16string {u"include"}};
34
inline static std::u16string MAIN_STRING {std::u16string {u"main"}};
35
inline static std::u16string REMOVE_STRING {std::u16string {u"remove"}};
38
inout::RegularInputStreamSupplier regular_input_stream_supplier;
39
SyntaxDataCollector_null null_syntax_data_collector;
42
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
43
const std::string & path,
44
ast::FormationFlow_interface & flow)
45
: RdpBaseSugar(m, flow.tree().files())
48
, _input_stream_supplier(regular_input_stream_supplier)
49
, _syntax_data_collector(null_syntax_data_collector)
51
initiateFileForParse();
54
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
55
const std::string & path,
56
ast::FormationFlow_interface & flow,
57
inout::InputStreamSupplier_interface & input_stream_supplier,
58
SyntaxDataCollector_interface & syntax_data_collector)
59
: RdpBaseSugar(m, flow.tree().files())
62
, _input_stream_supplier(input_stream_supplier)
63
, _syntax_data_collector(syntax_data_collector)
65
initiateFileForParse();
70
if (!_is_ready_for_parse)
73
fs::path grammar_path = _path;
75
if (grammar_path.extension().empty())
76
grammar_path += FUZE_FILE_EXTENSION;
78
std::shared_ptr<inout::InputStream_interface> in = _input_stream_supplier.supply(grammar_path.string());
80
if (!in || !in->good()) {
81
reporter().reportFatal(inout::fmt("Ошибка при открытии файла '%1'").arg(grammar_path.string()));
88
bool FuzeRdp::parse(inout::InputStream_interface & stream)
90
if (!_is_ready_for_parse)
93
inout::LexicalParameters lex;
96
{u"/*", u"*/", u"", inout::LexemeType::Comment},
97
{u"//", u"", u"", inout::LexemeType::Comment},
98
{u"\"", u"\"", u"\\", inout::LexemeType::Annotation}
101
{u"N", inout::LexemeType::Number, 10}
103
lex.punctuation_chars = u"=|;><.{}[](),";
104
lex.punctuation_words = {
105
INCLUDE_STRING, MAIN_STRING, REMOVE_STRING,
106
ANNOTATION_STRING, NUMBER_STRING, ID_STRING,
109
lex.may_national_letters_use = true;
110
lex.may_national_letters_mix = true;
112
inout::Tokenizer tzer(_current_file_index, stream, lex);
114
inout::Token t = getToken(tzer);
116
while(t.type() != inout::LexemeType::Empty)
118
while(t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
121
if (t.type() == inout::LexemeType::Empty)
124
if (t.type() == inout::LexemeType::Punctuation && t.qualification() == inout::TokenQualification::Keyword)
126
if (!parseKeyword(tzer, t))
131
else if (t.type() == inout::LexemeType::Id)
137
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{")
139
// ID "{" <script> "}"
141
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::GlobalScript), id, id);
143
FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
145
script_parser.parse(tzer, t);
147
/// \todo Обрабатывать возврат false!
150
else if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"=")
152
// ID "=" <pattern> ";"
154
if (!parseProduction(id, tzer, t))
157
/// \todo Обрабатывать возврат false!
161
return reportUnexpected(t);
164
return reportUnexpected(t);
172
bool FuzeRdp::parseKeyword(inout::Tokenizer & tzer, inout::Token & t)
174
if ( t.lexeme() == INCLUDE_STRING)
178
if (t.type() == inout::LexemeType::Annotation || t.type() == inout::LexemeType::Id)
180
inout::Token grammar_name_token = t;
181
std::string grammar_name = inout::toU8(grammar_name_token.lexeme());
185
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
187
fs::path grammar_path = _path;
189
grammar_path = grammar_path.parent_path() / grammar_name;
191
if (grammar_path.extension().empty())
192
grammar_path += FUZE_FILE_EXTENSION;
194
if (!fs::exists(grammar_path)) {
195
reporter().reportError(grammar_name_token.makeLocation(files()),
196
inout::fmt("Файл грамматики '%1' не найден")
197
.arg(grammar_path.string()));
201
/// \note Операция нужна только для передачи в семантику ссылки
202
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Reference), grammar_name_token, grammar_name_token);
203
inout::Token ref_token {inout::LexemeType::Annotation, grammar_path.u16string(), t.location()};
204
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), ref_token, ref_token);
207
bool ok = FuzeRdp(reporter(), grammar_path.string(), _flow, _input_stream_supplier, _syntax_data_collector).parse();
210
reporter().reportError(grammar_name_token.makeLocation(files()),
211
inout::fmt("При разборе грамматики '%1' обнаружены ошибки")
212
.arg(grammar_name_token.lexeme()));
217
return reportUnexpected(t, "';'");
220
return reportUnexpected(t, inout::fmt("наименование грамматики"));
222
else if (t.lexeme() == MAIN_STRING)
226
if (t.type() == inout::LexemeType::Id)
228
inout::Token main_production = t;
232
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
234
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Main), main_production, main_production);
238
return reportUnexpected(t, "';'");
241
return reportUnexpected(t, inout::fmt("идентификатор продукции"));
243
else if (t.lexeme() == REMOVE_STRING)
245
inout::Token production_token = getToken(tzer);
247
if (production_token.type() != inout::LexemeType::Id)
248
return reportUnexpected(t, inout::fmt("идентификатор продукции"));
250
inout::Token equal_token = getToken(tzer);
252
if (!parseProduction(production_token, tzer, equal_token, FuzeOperationCode::RemoveProduction))
255
/// \todo Обрабатывать возврат false!
260
return reportUnexpected(t,
261
inout::fmt("ключевое слово '%1', '%2' или '%3'")
264
.arg(REMOVE_STRING));
267
bool FuzeRdp::parseProduction(const inout::Token & id, inout::Tokenizer &tzer, inout::Token &t, FuzeOperationCode operation_code)
269
if (t.type() != inout::LexemeType::Punctuation || t.lexeme() != u"=")
270
return reportUnexpected(t, "'='");
274
while(t.type() != inout::LexemeType::Empty)
276
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(operation_code), id, id);
278
if (!parsePattern(tzer,t))
281
/// \todo Обрабатывать возврат false!
284
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
288
else if (t.type() == inout::LexemeType::Punctuation && (t.lexeme() == u"|" || t.lexeme() == u"="))
294
return reportUnexpected(t, inout::fmt("'|', '=' или ';'"));
297
reporter().reportError(t.makeLocation(files()), inout::fmt("Преждевременный конец файла"));
301
bool FuzeRdp::parsePattern(inout::Tokenizer &tzer, inout::Token &t)
303
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Pattern), t, t);
305
while(t.type() != inout::LexemeType::Empty)
307
if (t.type() == inout::LexemeType::Punctuation && t.qualification() != inout::TokenQualification::Keyword) {
308
if (t.lexeme() == u"<" || t.lexeme() == u">") {
309
/// \todo Обрабатывать возврат false!
311
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Direction), t, t);
315
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{") {
316
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
318
FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
319
bool ok = script_parser.parse(tzer, t);
321
/// \todo Обрабатывать возврат false!
328
else if (t.lexeme() == u"{") {
329
/// \todo Обрабатывать возврат false!
331
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
333
FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
334
bool ok = script_parser.parse(tzer, t);
336
/// \todo Обрабатывать возврат false!
340
if (t.lexeme() == u";" || t.lexeme() == u"=" || t.lexeme() == u"|") {
341
/// \todo Обрабатывать возврат false!
345
return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
348
if (t.type() == inout::LexemeType::Id || t.type() == inout::LexemeType::Annotation
349
|| t.qualification() == inout::TokenQualification::Keyword) {
350
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), t, t);
354
return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
357
/// \todo Обрабатывать возврат false!
362
inout::Token FuzeRdp::getToken(inout::Tokenizer & tzer) const
364
inout::Token token = tzer.getAnyToken();
366
while (token.type() == inout::LexemeType::Comment) {
367
_syntax_data_collector.collectToken(token);
369
token = tzer.getAnyToken();
372
_syntax_data_collector.collectToken(token);
377
void FuzeRdp::initiateFileForParse()
379
fs::path grammar_path = _path;
381
if (grammar_path.extension().empty())
382
grammar_path += FUZE_FILE_EXTENSION;
384
_is_ready_for_parse = std::find(files().begin(),files().end(),grammar_path.string()) == files().end();
386
if (_is_ready_for_parse) {
387
_current_file_index = files().size();
388
_flow.addFile(grammar_path.string());
391
_current_file_index = 0;