loom

FuzeRdp.cpp
396 строк · 13.4 Кб
Перенос по словам
1
/*
2
MIT License
3

4
Copyright (c) 2021 МГТУ им. Н.Э. Баумана, кафедра ИУ-6, Михаил Фетисов,
5

6
https://bmstu.codes/lsx/simodo/loom
7
*/
8

9
#include "simodo/parser/fuze/FuzeRdp.h"
10
#include "simodo/parser/fuze/FuzeSblRdp.h"
11
#include "simodo/parser/fuze/FuzeOperationCode.h"
12
#include "simodo/parser/fuze/fuze_file_extension.h"
13
#include "simodo/parser/fuze/fuze_keywords.h"
14

15
#include "simodo/inout/token/RegularInputStreamSupplier.h"
16
#include "simodo/inout/convert/functions.h"
17
#include "simodo/inout/format/fmt.h"
18

19
#include <cassert>
20
#include <algorithm>
21
#include <limits>
22

23
#if __cplusplus >= __cpp_2017
24
#include <filesystem>
25
namespace fs = std::filesystem;
26
#else
27
#include <experimental/filesystem>
28
namespace fs = std::filesystem::experimental;
29
#endif
30

31
namespace simodo::parser
32
{
33

34
inline static std::u16string INCLUDE_STRING  {std::u16string {u"include"}};
35
inline static std::u16string MAIN_STRING     {std::u16string {u"main"}};
36
inline static std::u16string REMOVE_STRING   {std::u16string {u"remove"}};
37

38
namespace {
39
    inout::RegularInputStreamSupplier  regular_input_stream_supplier;
40
    SyntaxDataCollector_null    null_syntax_data_collector;
41
}
42

43
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
44
                 const std::string & path,
45
                 ast::FormationFlow_interface & flow)
46
    : RdpBaseSugar(m, flow.tree().files())
47
    , _path(path)
48
    , _flow(flow)
49
    , _input_stream_supplier(regular_input_stream_supplier)
50
    , _syntax_data_collector(null_syntax_data_collector)
51
{
52
    initiateFileForParse();
53
}
54

55
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
56
                 const std::string & path,
57
                 ast::FormationFlow_interface & flow,
58
                 inout::InputStreamSupplier_interface & input_stream_supplier,
59
                 SyntaxDataCollector_interface & syntax_data_collector)
60
    : RdpBaseSugar(m, flow.tree().files())
61
    , _path(path)
62
    , _flow(flow)
63
    , _input_stream_supplier(input_stream_supplier)
64
    , _syntax_data_collector(syntax_data_collector)
65
{
66
    initiateFileForParse();
67
}
68

69
bool FuzeRdp::parse()
70
{
71
    if (!_is_ready_for_parse)
72
        return true;
73

74
    fs::path grammar_path = _path;
75

76
    if (grammar_path.extension().empty())
77
        grammar_path += FUZE_FILE_EXTENSION;
78

79
    std::shared_ptr<inout::InputStream_interface> in = _input_stream_supplier.supply(grammar_path.string());
80

81
    if (!in || !in->good()) {
82
        reporter().reportFatal(inout::fmt("Ошибка при открытии файла '%1'").arg(grammar_path.string()));
83
        return false;
84
    }
85

86
    return parse(*in);
87
}
88

89
bool FuzeRdp::parse(inout::InputStream_interface & stream)
90
{
91
    if (!_is_ready_for_parse)
92
        return true;
93

94
    inout::LexicalParameters lex;
95

96
    lex.markups = {
97
        {u"/*", u"*/", u"", inout::LexemeType::Comment},
98
        {u"//", u"", u"", inout::LexemeType::Comment},
99
        {u"\"", u"\"", u"\\", inout::LexemeType::Annotation}
100
    };
101
    lex.masks = {
102
        {u"N", inout::LexemeType::Number, 10}
103
    };
104
    lex.punctuation_chars = u"=|;><.{}[](),";
105
    lex.punctuation_words = {
106
        INCLUDE_STRING, MAIN_STRING, REMOVE_STRING,
107
        ANNOTATION_STRING, NUMBER_STRING, ID_STRING,
108
        u"true", u"false"
109
    };
110
    lex.may_national_letters_use = true;
111
    lex.may_national_letters_mix = true;
112

113
    inout::Tokenizer tzer(_current_file_index, stream, lex);
114

115
    inout::Token     t = getToken(tzer);
116

117
    while(t.type() != inout::LexemeType::Empty)
118
    {
119
        while(t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
120
            t = getToken(tzer);
121

122
        if (t.type() == inout::LexemeType::Empty)
123
            break;
124

125
        if (t.type() == inout::LexemeType::Punctuation && t.qualification() == inout::TokenQualification::Keyword)
126
        {
127
            if (!parseKeyword(tzer, t))
128
                return false;
129

130
            t = getToken(tzer);
131
        }
132
        else if (t.type() == inout::LexemeType::Id)
133
        {
134
            inout::Token id = t;
135

136
            t = getToken(tzer);
137

138
            if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{")
139
            {
140
                // ID "{" <script> "}"
141
                //     ^
142
                ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::GlobalScript), id, id);
143

144
                FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
145

146
                script_parser.parse(tzer, t);
147

148
                /// \todo Обрабатывать возврат false!
149
                ast().goParent();
150
            }
151
            else if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"=")
152
            {
153
                // ID "=" <pattern> ";"
154
                //     ^
155
                if (!parseProduction(id, tzer, t))
156
                    return false;
157

158
                /// \todo Обрабатывать возврат false!
159
                ast().goParent();
160
            }
161
            else
162
                return reportUnexpected(t);
163
        }
164
        else
165
            return reportUnexpected(t);
166
    }
167

168
    _flow.finalize();
169

170
    return true;
171
}
172

173
bool FuzeRdp::parseKeyword(inout::Tokenizer & tzer, inout::Token & t)
174
{
175
    if ( t.lexeme() == INCLUDE_STRING)
176
    {
177
        t = getToken(tzer);
178

179
        if (t.type() == inout::LexemeType::Annotation || t.type() == inout::LexemeType::Id)
180
        {
181
            inout::Token    grammar_name_token = t;
182
            std::string     grammar_name       = inout::toU8(grammar_name_token.lexeme());
183

184
            t = getToken(tzer);
185

186
            if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
187
            {
188
                fs::path grammar_path = _path;
189

190
                grammar_path = grammar_path.parent_path() / grammar_name;
191

192
                if (grammar_path.extension().empty())
193
                    grammar_path += FUZE_FILE_EXTENSION;
194

195
                if (!fs::exists(grammar_path)) {
196
                    reporter().reportError(grammar_name_token.makeLocation(files()), 
197
                                           inout::fmt("Файл грамматики '%1' не найден")
198
                                           .arg(grammar_path.string()));
199
                    return false;
200
                }
201

202
                /// \note Операция нужна только для передачи в семантику ссылки
203
                ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Reference), grammar_name_token, grammar_name_token);
204
                inout::Token ref_token {inout::LexemeType::Annotation, grammar_path.u16string(), t.location()};
205
                ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), ref_token, ref_token);
206
                ast().goParent();
207

208
                bool ok = FuzeRdp(reporter(), grammar_path.string(), _flow, _input_stream_supplier, _syntax_data_collector).parse();
209

210
                if (!ok)
211
                    reporter().reportError(grammar_name_token.makeLocation(files()), 
212
                                           inout::fmt("При разборе грамматики '%1' обнаружены ошибки")
213
                                           .arg(grammar_name_token.lexeme()));
214

215
                return ok;
216
            }
217

218
            return reportUnexpected(t, "';'");
219
        }
220

221
        return reportUnexpected(t, inout::fmt("наименование грамматики"));
222
    }
223
    else if (t.lexeme() == MAIN_STRING)
224
    {
225
        t = getToken(tzer);
226

227
        if (t.type() == inout::LexemeType::Id)
228
        {
229
            inout::Token main_production = t;
230

231
            t = getToken(tzer);
232

233
            if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
234
            {
235
                ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Main), main_production, main_production);
236
                return true;
237
            }
238

239
            return reportUnexpected(t, "';'");
240
        }
241

242
        return reportUnexpected(t, inout::fmt("идентификатор продукции"));
243
    }
244
    else if (t.lexeme() == REMOVE_STRING)
245
    {
246
        inout::Token production_token = getToken(tzer);
247

248
        if (production_token.type() != inout::LexemeType::Id)
249
            return reportUnexpected(t, inout::fmt("идентификатор продукции"));
250

251
        inout::Token equal_token      = getToken(tzer);
252

253
        if (!parseProduction(production_token, tzer, equal_token, FuzeOperationCode::RemoveProduction))
254
            return false;
255

256
        /// \todo Обрабатывать возврат false!
257
        ast().goParent();
258
        return true;
259
    }
260

261
    return reportUnexpected(t, 
262
                            inout::fmt("ключевое слово '%1', '%2' или '%3'")
263
                            .arg(INCLUDE_STRING)
264
                            .arg(MAIN_STRING)
265
                            .arg(REMOVE_STRING));
266
}
267

268
bool FuzeRdp::parseProduction(const inout::Token & id, inout::Tokenizer &tzer, inout::Token &t, FuzeOperationCode operation_code)
269
{
270
    if (t.type() != inout::LexemeType::Punctuation || t.lexeme() != u"=")
271
        return reportUnexpected(t, "'='");
272

273
    t = getToken(tzer);
274

275
    while(t.type() != inout::LexemeType::Empty)
276
    {
277
        ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(operation_code), id, id);
278

279
        if (!parsePattern(tzer,t))
280
            return false;
281

282
        /// \todo Обрабатывать возврат false!
283
        ast().goParent();
284

285
        if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
286
        {
287
            return true;
288
        }
289
        else if (t.type() == inout::LexemeType::Punctuation && (t.lexeme() == u"|" || t.lexeme() == u"="))
290
        {
291
            t = getToken(tzer);
292
            continue;
293
        }
294
        else
295
            return reportUnexpected(t, inout::fmt("'|', '=' или ';'"));
296
    }
297

298
    reporter().reportError(t.makeLocation(files()), inout::fmt("Преждевременный конец файла"));
299
    return false;
300
}
301

302
bool FuzeRdp::parsePattern(inout::Tokenizer &tzer, inout::Token &t)
303
{
304
    ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Pattern), t, t);
305

306
    while(t.type() != inout::LexemeType::Empty)
307
    {
308
        if (t.type() == inout::LexemeType::Punctuation && t.qualification() != inout::TokenQualification::Keyword) {
309
            if (t.lexeme() == u"<" || t.lexeme() == u">") {
310
                /// \todo Обрабатывать возврат false!
311
                ast().goParent();
312
                ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Direction), t, t);
313

314
                t = getToken(tzer);
315

316
                if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{") {
317
                    ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
318

319
                    FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
320
                    bool       ok = script_parser.parse(tzer, t);
321

322
                    /// \todo Обрабатывать возврат false!
323
                    ast().goParent();
324
                    return ok;
325
                }
326

327
                return true;
328
            }
329
            else if (t.lexeme() == u"{") {
330
                /// \todo Обрабатывать возврат false!
331
                ast().goParent();
332
                ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
333

334
                FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
335
                bool       ok = script_parser.parse(tzer, t);
336

337
                /// \todo Обрабатывать возврат false!
338
                ast().goParent();
339
                return ok;
340
            }
341
            if (t.lexeme() == u";" || t.lexeme() == u"=" || t.lexeme() == u"|") {
342
                /// \todo Обрабатывать возврат false!
343
                ast().goParent();
344
                return true;
345
            }
346
            return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
347
        }
348

349
        if (t.type() == inout::LexemeType::Id || t.type() == inout::LexemeType::Annotation
350
         || t.qualification() == inout::TokenQualification::Keyword) {
351
            ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), t, t);
352
            t = getToken(tzer);
353
        }
354
        else
355
            return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
356
    }
357

358
    /// \todo Обрабатывать возврат false!
359
    ast().goParent();
360
    return true;
361
}
362

363
inout::Token FuzeRdp::getToken(inout::Tokenizer & tzer) const
364
{
365
    inout::Token token = tzer.getAnyToken();
366

367
    while (token.type() == inout::LexemeType::Comment) {
368
        _syntax_data_collector.collectToken(token);
369

370
        token = tzer.getAnyToken();
371
    }
372

373
    _syntax_data_collector.collectToken(token);
374

375
    return token;
376
}
377

378
void FuzeRdp::initiateFileForParse()
379
{
380
    fs::path grammar_path = _path;
381

382
    if (grammar_path.extension().empty())
383
        grammar_path += FUZE_FILE_EXTENSION;
384

385
   _is_ready_for_parse = std::find(files().begin(),files().end(),grammar_path.string()) == files().end();
386

387
    if (_is_ready_for_parse) {
388
        assert(files().size() <= std::numeric_limits<inout::uri_index_t>::max());
389
        _current_file_index = files().size();
390
        _flow.addFile(grammar_path.string());
391
    }
392
    else
393
        _current_file_index = 0;
394
}
395

396
}
loom

Использование cookies