loom

FuzeRdp.cpp
394 строки · 13.3 Кб
Перенос по словам
1
/*
2
MIT License
3

4
Copyright (c) 2021 МГТУ им. Н.Э. Баумана, кафедра ИУ-6, Михаил Фетисов,
5

6
https://bmstu.codes/lsx/simodo/loom
7
*/
8

9
#include "simodo/parser/fuze/FuzeRdp.h"
10
#include "simodo/parser/fuze/FuzeSblRdp.h"
11
#include "simodo/parser/fuze/FuzeOperationCode.h"
12
#include "simodo/parser/fuze/fuze_file_extension.h"
13
#include "simodo/parser/fuze/fuze_keywords.h"
14

15
#include "simodo/inout/token/RegularInputStreamSupplier.h"
16
#include "simodo/inout/convert/functions.h"
17
#include "simodo/inout/format/fmt.h"
18

19
#include <cassert>
20
#include <algorithm>
21

22
#if __cplusplus >= __cpp_2017
23
#include <filesystem>
24
namespace fs = std::filesystem;
25
#else
26
#include <experimental/filesystem>
27
namespace fs = std::filesystem::experimental;
28
#endif
29

30
namespace simodo::parser
31
{
32

33
inline static std::u16string INCLUDE_STRING  {std::u16string {u"include"}};
34
inline static std::u16string MAIN_STRING     {std::u16string {u"main"}};
35
inline static std::u16string REMOVE_STRING   {std::u16string {u"remove"}};
36

37
namespace {
38
    inout::RegularInputStreamSupplier  regular_input_stream_supplier;
39
    SyntaxDataCollector_null    null_syntax_data_collector;
40
}
41

42
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
43
                 const std::string & path,
44
                 ast::FormationFlow_interface & flow)
45
    : RdpBaseSugar(m, flow.tree().files())
46
    , _path(path)
47
    , _flow(flow)
48
    , _input_stream_supplier(regular_input_stream_supplier)
49
    , _syntax_data_collector(null_syntax_data_collector)
50
{
51
    initiateFileForParse();
52
}
53

54
FuzeRdp::FuzeRdp(inout::Reporter_abstract & m,
55
                 const std::string & path,
56
                 ast::FormationFlow_interface & flow,
57
                 inout::InputStreamSupplier_interface & input_stream_supplier,
58
                 SyntaxDataCollector_interface & syntax_data_collector)
59
    : RdpBaseSugar(m, flow.tree().files())
60
    , _path(path)
61
    , _flow(flow)
62
    , _input_stream_supplier(input_stream_supplier)
63
    , _syntax_data_collector(syntax_data_collector)
64
{
65
    initiateFileForParse();
66
}
67

68
bool FuzeRdp::parse()
69
{
70
    if (!_is_ready_for_parse)
71
        return true;
72

73
    fs::path grammar_path = _path;
74

75
    if (grammar_path.extension().empty())
76
        grammar_path += FUZE_FILE_EXTENSION;
77

78
    std::shared_ptr<inout::InputStream_interface> in = _input_stream_supplier.supply(grammar_path.string());
79

80
    if (!in || !in->good()) {
81
        reporter().reportFatal(inout::fmt("Ошибка при открытии файла '%1'").arg(grammar_path.string()));
82
        return false;
83
    }
84

85
    return parse(*in);
86
}
87

88
bool FuzeRdp::parse(inout::InputStream_interface & stream)
89
{
90
    if (!_is_ready_for_parse)
91
        return true;
92

93
    inout::LexicalParameters lex;
94

95
    lex.markups = {
96
        {u"/*", u"*/", u"", inout::LexemeType::Comment},
97
        {u"//", u"", u"", inout::LexemeType::Comment},
98
        {u"\"", u"\"", u"\\", inout::LexemeType::Annotation}
99
    };
100
    lex.masks = {
101
        {u"N", inout::LexemeType::Number, 10}
102
    };
103
    lex.punctuation_chars = u"=|;><.{}[](),";
104
    lex.punctuation_words = {
105
        INCLUDE_STRING, MAIN_STRING, REMOVE_STRING,
106
        ANNOTATION_STRING, NUMBER_STRING, ID_STRING,
107
        u"true", u"false"
108
    };
109
    lex.may_national_letters_use = true;
110
    lex.may_national_letters_mix = true;
111

112
    inout::Tokenizer tzer(_current_file_index, stream, lex);
113

114
    inout::Token     t = getToken(tzer);
115

116
    while(t.type() != inout::LexemeType::Empty)
117
    {
118
        while(t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
119
            t = getToken(tzer);
120

121
        if (t.type() == inout::LexemeType::Empty)
122
            break;
123

124
        if (t.type() == inout::LexemeType::Punctuation && t.qualification() == inout::TokenQualification::Keyword)
125
        {
126
            if (!parseKeyword(tzer, t))
127
                return false;
128

129
            t = getToken(tzer);
130
        }
131
        else if (t.type() == inout::LexemeType::Id)
132
        {
133
            inout::Token id = t;
134

135
            t = getToken(tzer);
136

137
            if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{")
138
            {
139
                // ID "{" <script> "}"
140
                //     ^
141
                ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::GlobalScript), id, id);
142

143
                FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
144

145
                script_parser.parse(tzer, t);
146

147
                /// \todo Обрабатывать возврат false!
148
                ast().goParent();
149
            }
150
            else if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"=")
151
            {
152
                // ID "=" <pattern> ";"
153
                //     ^
154
                if (!parseProduction(id, tzer, t))
155
                    return false;
156

157
                /// \todo Обрабатывать возврат false!
158
                ast().goParent();
159
            }
160
            else
161
                return reportUnexpected(t);
162
        }
163
        else
164
            return reportUnexpected(t);
165
    }
166

167
    _flow.finalize();
168

169
    return true;
170
}
171

172
bool FuzeRdp::parseKeyword(inout::Tokenizer & tzer, inout::Token & t)
173
{
174
    if ( t.lexeme() == INCLUDE_STRING)
175
    {
176
        t = getToken(tzer);
177

178
        if (t.type() == inout::LexemeType::Annotation || t.type() == inout::LexemeType::Id)
179
        {
180
            inout::Token    grammar_name_token = t;
181
            std::string     grammar_name       = inout::toU8(grammar_name_token.lexeme());
182

183
            t = getToken(tzer);
184

185
            if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
186
            {
187
                fs::path grammar_path = _path;
188

189
                grammar_path = grammar_path.parent_path() / grammar_name;
190

191
                if (grammar_path.extension().empty())
192
                    grammar_path += FUZE_FILE_EXTENSION;
193

194
                if (!fs::exists(grammar_path)) {
195
                    reporter().reportError(grammar_name_token.makeLocation(files()), 
196
                                           inout::fmt("Файл грамматики '%1' не найден")
197
                                           .arg(grammar_path.string()));
198
                    return false;
199
                }
200

201
                /// \note Операция нужна только для передачи в семантику ссылки
202
                ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Reference), grammar_name_token, grammar_name_token);
203
                inout::Token ref_token {inout::LexemeType::Annotation, grammar_path.u16string(), t.location()};
204
                ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), ref_token, ref_token);
205
                ast().goParent();
206

207
                bool ok = FuzeRdp(reporter(), grammar_path.string(), _flow, _input_stream_supplier, _syntax_data_collector).parse();
208

209
                if (!ok)
210
                    reporter().reportError(grammar_name_token.makeLocation(files()), 
211
                                           inout::fmt("При разборе грамматики '%1' обнаружены ошибки")
212
                                           .arg(grammar_name_token.lexeme()));
213

214
                return ok;
215
            }
216

217
            return reportUnexpected(t, "';'");
218
        }
219

220
        return reportUnexpected(t, inout::fmt("наименование грамматики"));
221
    }
222
    else if (t.lexeme() == MAIN_STRING)
223
    {
224
        t = getToken(tzer);
225

226
        if (t.type() == inout::LexemeType::Id)
227
        {
228
            inout::Token main_production = t;
229

230
            t = getToken(tzer);
231

232
            if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
233
            {
234
                ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Main), main_production, main_production);
235
                return true;
236
            }
237

238
            return reportUnexpected(t, "';'");
239
        }
240

241
        return reportUnexpected(t, inout::fmt("идентификатор продукции"));
242
    }
243
    else if (t.lexeme() == REMOVE_STRING)
244
    {
245
        inout::Token production_token = getToken(tzer);
246

247
        if (production_token.type() != inout::LexemeType::Id)
248
            return reportUnexpected(t, inout::fmt("идентификатор продукции"));
249

250
        inout::Token equal_token      = getToken(tzer);
251

252
        if (!parseProduction(production_token, tzer, equal_token, FuzeOperationCode::RemoveProduction))
253
            return false;
254

255
        /// \todo Обрабатывать возврат false!
256
        ast().goParent();
257
        return true;
258
    }
259

260
    return reportUnexpected(t, 
261
                            inout::fmt("ключевое слово '%1', '%2' или '%3'")
262
                            .arg(INCLUDE_STRING)
263
                            .arg(MAIN_STRING)
264
                            .arg(REMOVE_STRING));
265
}
266

267
bool FuzeRdp::parseProduction(const inout::Token & id, inout::Tokenizer &tzer, inout::Token &t, FuzeOperationCode operation_code)
268
{
269
    if (t.type() != inout::LexemeType::Punctuation || t.lexeme() != u"=")
270
        return reportUnexpected(t, "'='");
271

272
    t = getToken(tzer);
273

274
    while(t.type() != inout::LexemeType::Empty)
275
    {
276
        ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(operation_code), id, id);
277

278
        if (!parsePattern(tzer,t))
279
            return false;
280

281
        /// \todo Обрабатывать возврат false!
282
        ast().goParent();
283

284
        if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u";")
285
        {
286
            return true;
287
        }
288
        else if (t.type() == inout::LexemeType::Punctuation && (t.lexeme() == u"|" || t.lexeme() == u"="))
289
        {
290
            t = getToken(tzer);
291
            continue;
292
        }
293
        else
294
            return reportUnexpected(t, inout::fmt("'|', '=' или ';'"));
295
    }
296

297
    reporter().reportError(t.makeLocation(files()), inout::fmt("Преждевременный конец файла"));
298
    return false;
299
}
300

301
bool FuzeRdp::parsePattern(inout::Tokenizer &tzer, inout::Token &t)
302
{
303
    ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Pattern), t, t);
304

305
    while(t.type() != inout::LexemeType::Empty)
306
    {
307
        if (t.type() == inout::LexemeType::Punctuation && t.qualification() != inout::TokenQualification::Keyword) {
308
            if (t.lexeme() == u"<" || t.lexeme() == u">") {
309
                /// \todo Обрабатывать возврат false!
310
                ast().goParent();
311
                ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Direction), t, t);
312

313
                t = getToken(tzer);
314

315
                if (t.type() == inout::LexemeType::Punctuation && t.lexeme() == u"{") {
316
                    ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
317

318
                    FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
319
                    bool       ok = script_parser.parse(tzer, t);
320

321
                    /// \todo Обрабатывать возврат false!
322
                    ast().goParent();
323
                    return ok;
324
                }
325

326
                return true;
327
            }
328
            else if (t.lexeme() == u"{") {
329
                /// \todo Обрабатывать возврат false!
330
                ast().goParent();
331
                ast().addNode_StepInto(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::Production_Script), t, t);
332

333
                FuzeSblRdp script_parser(reporter(), _flow, _syntax_data_collector);
334
                bool       ok = script_parser.parse(tzer, t);
335

336
                /// \todo Обрабатывать возврат false!
337
                ast().goParent();
338
                return ok;
339
            }
340
            if (t.lexeme() == u";" || t.lexeme() == u"=" || t.lexeme() == u"|") {
341
                /// \todo Обрабатывать возврат false!
342
                ast().goParent();
343
                return true;
344
            }
345
            return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
346
        }
347

348
        if (t.type() == inout::LexemeType::Id || t.type() == inout::LexemeType::Annotation
349
         || t.qualification() == inout::TokenQualification::Keyword) {
350
            ast().addNode(FUZE_HOST_NAME, static_cast<ast::OperationCode>(FuzeOperationCode::None), t, t);
351
            t = getToken(tzer);
352
        }
353
        else
354
            return reportUnexpected(t, inout::fmt("'символ грамматики', '<', '>', '{', '=', '|' или ';'"));
355
    }
356

357
    /// \todo Обрабатывать возврат false!
358
    ast().goParent();
359
    return true;
360
}
361

362
inout::Token FuzeRdp::getToken(inout::Tokenizer & tzer) const
363
{
364
    inout::Token token = tzer.getAnyToken();
365

366
    while (token.type() == inout::LexemeType::Comment) {
367
        _syntax_data_collector.collectToken(token);
368

369
        token = tzer.getAnyToken();
370
    }
371

372
    _syntax_data_collector.collectToken(token);
373

374
    return token;
375
}
376

377
void FuzeRdp::initiateFileForParse()
378
{
379
    fs::path grammar_path = _path;
380

381
    if (grammar_path.extension().empty())
382
        grammar_path += FUZE_FILE_EXTENSION;
383

384
   _is_ready_for_parse = std::find(files().begin(),files().end(),grammar_path.string()) == files().end();
385

386
    if (_is_ready_for_parse) {
387
        _current_file_index = files().size();
388
        _flow.addFile(grammar_path.string());
389
    }
390
    else
391
        _current_file_index = 0;
392
}
393

394
}
loom

Использование cookies