4
Copyright (c) 2021 МГТУ им. Н.Э. Баумана, кафедра ИУ-6, Михаил Фетисов,
6
https://bmstu.codes/lsx/simodo/loom
9
#include "simodo/parser/fuze/FuzeRdp.h"
10
#include "simodo/parser/fuze/FuzeSblRdp.h"
11
#include "simodo/parser/fuze/FuzeOperationCode.h"
12
#include "simodo/parser/fuze/fuze_file_extension.h"
13
#include "simodo/parser/fuze/fuze_keywords.h"
15
#include "simodo/inout/token/RegularInputStreamSupplier.h"
16
#include "simodo/inout/convert/functions.h"
17
#include "simodo/inout/format/fmt.h"
23
#if __cplusplus >= __cpp_2017
25
namespace fs = std::filesystem;
27
#include <experimental/filesystem>
28
namespace fs = std::filesystem::experimental;
31
namespace simodo::parser
34
inline static std::u16string INCLUDE_STRING {std::u16string {u"include"}};
35
inline static std::u16string MAIN_STRING {std::u16string {u"main"}};
36
inline static std::u16string REMOVE_STRING {std::u16string {u"remove"}};
39
inout::RegularInputStreamSupplier regular_input_stream_supplier;
40
SyntaxDataCollector_null null_syntax_data_collector;
43
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
44
const std::string & path,
45
ast::FormationFlow_interface & flow)
46
: RdpBaseSugar(m, flow.tree().files())
49
, _input_stream_supplier(regular_input_stream_supplier)
50
, _syntax_data_collector(null_syntax_data_collector)
52
initiateFileForParse();
55
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
56
const std::string & path,
57
ast::FormationFlow_interface & flow,
58
inout::InputStreamSupplier_interface & input_stream_supplier,
59
SyntaxDataCollector_interface & syntax_data_collector)
60
: RdpBaseSugar(m, flow.tree().files())
63
, _input_stream_supplier(input_stream_supplier)
64
, _syntax_data_collector(syntax_data_collector)
66
initiateFileForParse();
71
if (!_is_ready_for_parse)
74
fs::path grammar_path = _path;
76
if (grammar_path.extension().empty())
77
grammar_path += FUZE_FILE_EXTENSION;
79
std::shared_ptr<inout::InputStream_interface> in = _input_stream_supplier.supply(grammar_path.string());
81
if (!in || !in->good()) {
82
reporter().reportFatal(inout::fmt("Ошибка при открытии файла '%1'").arg(grammar_path.string()));
89
bool FuzeRdp::parse(inout::InputStream_interface & stream)
91
if (!_is_ready_for_parse)
94
inout::LexicalParameters lex;
97
{u"/*", u"*/", u"", inout::LexemeType::Comment},
98
{u"//", u"", u"", inout::LexemeType::Comment},
99
{u"\"", u"\"", u"\\", inout::LexemeType::Annotation}
102
{u"N", inout::LexemeType::Number, 10}
104
lex.punctuation_chars = u"=|;><.{}[](),";
105
lex.punctuation_words = {
106
INCLUDE_STRING, MAIN_STRING, REMOVE_STRING,
107
ANNOTATION_STRING, NUMBER_STRING, ID_STRING,
110
lex.may_national_letters_use = true;
111
lex.may_national_letters_mix = true;
113
inout::Tokenizer tzer(_current_file_index, stream, lex);
115
inout::Token t = getToken(tzer);
117
while(t.type() != inout::LexemeType::Empty)
119
while(t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
122
if (t.type() == inout::LexemeType::Empty)
125
if (t.type() == inout::LexemeType::Punctuation && t.qualification() == inout::TokenQualification::Keyword)
127
if (!parseKeyword(tzer, t))
132
else if (t.type() == inout::LexemeType::Id)
138
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{")
140
// ID "{" <script> "}"
142
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::GlobalScript), id, id);
144
FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
146
script_parser.parse(tzer, t);
148
/// \todo Обрабатывать возврат false!
151
else if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"=")
153
// ID "=" <pattern> ";"
155
if (!parseProduction(id, tzer, t))
158
/// \todo Обрабатывать возврат false!
162
return reportUnexpected(t);
165
return reportUnexpected(t);
173
bool FuzeRdp::parseKeyword(inout::Tokenizer & tzer, inout::Token & t)
175
if ( t.lexeme() == INCLUDE_STRING)
179
if (t.type() == inout::LexemeType::Annotation || t.type() == inout::LexemeType::Id)
181
inout::Token grammar_name_token = t;
182
std::string grammar_name = inout::toU8(grammar_name_token.lexeme());
186
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
188
fs::path grammar_path = _path;
190
grammar_path = grammar_path.parent_path() / grammar_name;
192
if (grammar_path.extension().empty())
193
grammar_path += FUZE_FILE_EXTENSION;
195
if (!fs::exists(grammar_path)) {
196
reporter().reportError(grammar_name_token.makeLocation(files()),
197
inout::fmt("Файл грамматики '%1' не найден")
198
.arg(grammar_path.string()));
202
/// \note Операция нужна только для передачи в семантику ссылки
203
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Reference), grammar_name_token, grammar_name_token);
204
inout::Token ref_token {inout::LexemeType::Annotation, grammar_path.u16string(), t.location()};
205
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), ref_token, ref_token);
208
bool ok = FuzeRdp(reporter(), grammar_path.string(), _flow, _input_stream_supplier, _syntax_data_collector).parse();
211
reporter().reportError(grammar_name_token.makeLocation(files()),
212
inout::fmt("При разборе грамматики '%1' обнаружены ошибки")
213
.arg(grammar_name_token.lexeme()));
218
return reportUnexpected(t, "';'");
221
return reportUnexpected(t, inout::fmt("наименование грамматики"));
223
else if (t.lexeme() == MAIN_STRING)
227
if (t.type() == inout::LexemeType::Id)
229
inout::Token main_production = t;
233
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
235
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Main), main_production, main_production);
239
return reportUnexpected(t, "';'");
242
return reportUnexpected(t, inout::fmt("идентификатор продукции"));
244
else if (t.lexeme() == REMOVE_STRING)
246
inout::Token production_token = getToken(tzer);
248
if (production_token.type() != inout::LexemeType::Id)
249
return reportUnexpected(t, inout::fmt("идентификатор продукции"));
251
inout::Token equal_token = getToken(tzer);
253
if (!parseProduction(production_token, tzer, equal_token, FuzeOperationCode::RemoveProduction))
256
/// \todo Обрабатывать возврат false!
261
return reportUnexpected(t,
262
inout::fmt("ключевое слово '%1', '%2' или '%3'")
265
.arg(REMOVE_STRING));
268
bool FuzeRdp::parseProduction(const inout::Token & id, inout::Tokenizer &tzer, inout::Token &t, FuzeOperationCode operation_code)
270
if (t.type() != inout::LexemeType::Punctuation || t.lexeme() != u"=")
271
return reportUnexpected(t, "'='");
275
while(t.type() != inout::LexemeType::Empty)
277
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(operation_code), id, id);
279
if (!parsePattern(tzer,t))
282
/// \todo Обрабатывать возврат false!
285
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
289
else if (t.type() == inout::LexemeType::Punctuation && (t.lexeme() == u"|" || t.lexeme() == u"="))
295
return reportUnexpected(t, inout::fmt("'|', '=' или ';'"));
298
reporter().reportError(t.makeLocation(files()), inout::fmt("Преждевременный конец файла"));
302
bool FuzeRdp::parsePattern(inout::Tokenizer &tzer, inout::Token &t)
304
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Pattern), t, t);
306
while(t.type() != inout::LexemeType::Empty)
308
if (t.type() == inout::LexemeType::Punctuation && t.qualification() != inout::TokenQualification::Keyword) {
309
if (t.lexeme() == u"<" || t.lexeme() == u">") {
310
/// \todo Обрабатывать возврат false!
312
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Direction), t, t);
316
if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{") {
317
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
319
FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
320
bool ok = script_parser.parse(tzer, t);
322
/// \todo Обрабатывать возврат false!
329
else if (t.lexeme() == u"{") {
330
/// \todo Обрабатывать возврат false!
332
ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
334
FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
335
bool ok = script_parser.parse(tzer, t);
337
/// \todo Обрабатывать возврат false!
341
if (t.lexeme() == u";" || t.lexeme() == u"=" || t.lexeme() == u"|") {
342
/// \todo Обрабатывать возврат false!
346
return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
349
if (t.type() == inout::LexemeType::Id || t.type() == inout::LexemeType::Annotation
350
|| t.qualification() == inout::TokenQualification::Keyword) {
351
ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), t, t);
355
return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
358
/// \todo Обрабатывать возврат false!
363
inout::Token FuzeRdp::getToken(inout::Tokenizer & tzer) const
365
inout::Token token = tzer.getAnyToken();
367
while (token.type() == inout::LexemeType::Comment) {
368
_syntax_data_collector.collectToken(token);
370
token = tzer.getAnyToken();
373
_syntax_data_collector.collectToken(token);
378
void FuzeRdp::initiateFileForParse()
380
fs::path grammar_path = _path;
382
if (grammar_path.extension().empty())
383
grammar_path += FUZE_FILE_EXTENSION;
385
_is_ready_for_parse = std::find(files().begin(),files().end(),grammar_path.string()) == files().end();
387
if (_is_ready_for_parse) {
388
assert(files().size() <= std::numeric_limits<inout::uri_index_t>::max());
389
_current_file_index = files().size();
390
_flow.addFile(grammar_path.string());
393
_current_file_index = 0;