loom

grammatize.cpp
435 строк · 20.4 Кб
Перенос по словам
1
/*
2
MIT License
3

4
Copyright (c) 2021 МГТУ им. Н.Э. Баумана, кафедра ИУ-6, Михаил Фетисов,
5

6
https://bmstu.codes/lsx/simodo
7
*/
8

9
#include "simodo/engine/utility/grammatize.h"
10
#include "simodo/inout/convert/functions.h"
11
#include "simodo/inout/format/fmt.h"
12
#include "simodo/parser/fuze/FuzeRdp.h"
13
#include "simodo/ast/generator/FormationFlow.h"
14
#include "simodo/interpret/builtins/hosts/fuze/FuzeAnalyzer.h"
15
#include "simodo/interpret/builtins/hosts/base/BaseAnalyzer.h"
16
#include "simodo/interpret/builtins/hosts/base/BaseRunning.h"
17
#include "simodo/interpret/builtins/modules/LexicalParametersModule.h"
18
#include "simodo/interpret/builtins/modules/AstFormationModule.h"
19
#include "simodo/interpret/SemanticOperationsEnumsLoader.h"
20
#include "simodo/interpret/Interpret.h"
21
#include "simodo/engine/utility/common_functions.h"
22
#include "simodo/engine/utility/generateDotFile.h"
23
#include "simodo/loom/Loom.h"
24
#include "simodo/variable/convert/Ast.h"
25
#include "simodo/variable/json/Serialization.h"
26

27
#include <fstream>
28
#include <memory>
29
#include <algorithm>
30
#include <chrono>
31

32
#if __cplusplus >= __cpp_2017
33
#include <filesystem>
34
namespace fs = std::filesystem;
35
#else
36
#include <experimental/filesystem>
37
namespace fs = std::filesystem::experimental;
38
#endif
39

40
namespace simodo::engine
41
{
42
    class LocalClock
43
    {
44
        double & _seconds;
45
        std::chrono::time_point<std::chrono::steady_clock> _start;
46

47
    public:
48
        LocalClock() = delete;
49
        LocalClock(double & seconds) : _seconds(seconds)
50
        {
51
            _start = std::chrono::steady_clock::now();
52
        }
53

54
        ~LocalClock()
55
        {
56
            _seconds = std::chrono::duration<double>(std::chrono::steady_clock::now() - _start).count();
57
        }
58
    };
59

60
    void printRules(const parser::Grammar & g, const inout::uri_set_t & files, bool need_st_info, std::ostream & out)
61
    {
62
        out << inout::fmt("Правила грамматики:").str() << std::endl;
63
        for(size_t i=0; i < g.rules.size(); ++i) {
64
            char action = (g.rules[i].reduce_action.branches().empty()) ? ' ' : '*';
65
            char direction = (g.rules[i].reduce_direction == parser::RuleReduceDirection::Undefined)
66
                    ? ' ' : ((g.rules[i].reduce_direction == parser::RuleReduceDirection::LeftAssociative)
67
                                ? '<' : '>');
68
            out << i << ": " << action << direction << "\t" << inout::toU8(g.rules[i].production) << "\t→ ";
69

70
            for(const inout::Lexeme & lex : g.rules[i].pattern)
71
                out << engine::getMnemonic(lex) << " ";
72

73
            out << std::endl;
74

75
            if (need_st_info && !g.rules[i].reduce_action.branches().empty())
76
                engine::printSemanticTree(g.rules[i].reduce_action, files, 0, 2);
77
        }
78
    }
79

80
    void printStateTransitions(const parser::Grammar & g, std::ostream & out)
81
    {
82
        out << inout::fmt("Состояния автомата разбора:").str() << std::endl;
83
        for(size_t i=0; i < g.states.size(); ++i) {
84
            out << "State [" << i << "]:" << std::endl;
85

86
            const parser::FsmState_t & state = g.states[i];
87

88
            for(const parser::FsmStatePosition & p : state) {
89
                out << (p.is_main ? "    M:" : "     ") << "\t"
90
                        << inout::toU8(g.rules[p.rule_no].production) << "\t→ ";
91

92
                for(size_t j=0; j < g.rules[p.rule_no].pattern.size(); ++j) {
93
                    if (p.position == j)
94
                        out << "•";
95
                    out << engine::getMnemonic(g.rules[p.rule_no].pattern[j]) << " ";
96
                }
97
                if (p.position == g.rules[p.rule_no].pattern.size())
98
                    out << "•";
99

100
                if (!p.lookahead.empty()) {
101
                    out << "\t|";
102

103
                    for(const inout::Lexeme & lex : p.lookahead)
104
                        out << " " << engine::getMnemonic(lex);
105
                }
106

107
                out << "\t\t---- " << ((p.next_state_no > 0) ? "S" : "R") 
108
                    << ((p.next_state_no > 0) ? p.next_state_no : p.rule_no) 
109
                    << std::endl;
110
            }
111
        }
112

113
        out << inout::fmt("Символы грамматики:").str() << std::endl;
114
        for(size_t i=0; i < g.columns.size(); ++i)
115
            out << i << ": " << engine::getMnemonic(g.columns[i]) << std::endl;
116

117
        out << inout::fmt("Таблица разбора:").str();
118
        size_t cl         = 100000;
119
        size_t table_size = 0;
120
        for(auto [fsm_key,fsm_value] : g.parse_table) {
121
            size_t        line     = g.unpackFsmState(fsm_key);
122
            size_t        column   = g.unpackFsmColumn(fsm_key);
123
            parser::FsmActionType action   = g.unpackFsmAction(fsm_value);
124
            size_t        location = g.unpackFsmLocation(fsm_value);
125

126
            if (cl != line)
127
                out << std::endl << "[" << line << "]:\t";
128

129
            out << engine::getMnemonic(g.columns[column]) 
130
                 << "(" << inout::toU8(parser::getFsmActionChar(action)) 
131
                 << location << ") ";
132

133
            cl = line;
134
            table_size ++;
135
        }
136
        out << std::endl << inout::fmt("Количество элементов = ").str() << table_size << std::endl;
137
    }
138

139
    void createStateTransitionsGraph(const parser::Grammar & g, 
140
                                     const std::string & dot_file, 
141
                                     const std::string & grammar_file, 
142
                                     bool need_silence,
143
                                     std::ostream & out)
144
    {
145
        std::ofstream dot(dot_file);
146

147
        if(dot.good())
148
        {
149
            // dot << "digraph \"" << grammar_file << "\" { rankdir=\"LR\";" << endl;
150
            dot << "digraph \"" << grammar_file << "\" {" << std::endl;
151
            for(size_t i=0; i < g.states.size(); ++i)
152
            {
153
                dot << "\tS" << i << " [shape=none,style=filled,margin=0,fontsize=12,fontname=Helvetica,labelfloat=false,labelloc=t,labeljust=l,label=<" << std::endl
154
                    << "\t\t<TABLE BORDER=\"1\" CELLBORDER=\"0\" CELLSPACING=\"0\" CELLPADDING=\"2\">" << std::endl
155
                    << "\t\t<TR><TD COLSPAN=\"2\"><b>" << i << "</b></TD></TR>" << std::endl;
156

157
                const parser::FsmState_t & state = g.states[i];
158

159
                for(const parser::FsmStatePosition & p : state)
160
                {
161

162
                    dot << "\t\t\t<TR><TD align='right'>" << (p.is_main ? "* " : "") 
163
                        << engine::toHtml(inout::toU8(g.rules[p.rule_no].production))
164
                        << " →</TD><TD align='left'>";
165

166
                    for(size_t j=0; j < g.rules[p.rule_no].pattern.size(); ++j)
167
                    {
168
                        if (p.position == j)
169
                            dot << "•";
170
                        dot << engine::toHtml(engine::getMnemonic(g.rules[p.rule_no].pattern[j])) << " ";
171
                    }
172
                    if (p.position == g.rules[p.rule_no].pattern.size())
173
                        dot << "•";
174

175
                    if (!p.lookahead.empty())
176
                    {
177
                        dot << " |";
178

179
                        for(const inout::Lexeme & lex : p.lookahead)
180
                            dot << " " << engine::toHtml(engine::getMnemonic(lex));
181
                    }
182

183
                    dot << "</TD></TR>" << std::endl;
184
                }
185
                dot << "\t\t</TABLE>>];" << std::endl;
186
            }
187
            for(size_t i=0; i < g.states.size(); ++i)
188
            {
189
                const parser::FsmState_t & state = g.states[i];
190

191
                for(size_t ip=0; ip < state.size(); ++ip)
192
                {
193
                    const parser::FsmStatePosition & p = state[ip];
194

195
                    if (p.next_state_no > 0)
196
                    {
197
                        size_t j = 0;
198
                        for(; j < ip; ++j)
199
                            if (state[j].position < g.rules[state[j].rule_no].pattern.size() &&
200
                                    p.position < g.rules[p.rule_no].pattern.size() &&
201
                                    g.rules[state[j].rule_no].pattern[state[j].position] == g.rules[p.rule_no].pattern[p.position])
202
                                break;
203

204
                        if (j < ip)
205
                            continue;
206

207
                        std::string label;
208
                        if (p.position < g.rules[p.rule_no].pattern.size())
209
                            label = engine::toHtml(engine::getMnemonic(g.rules[p.rule_no].pattern[p.position]));
210
                        dot <<  "\t\tS" << i << " -> " << "S" << p.next_state_no
211
                                << " [fontsize=12,fontname=Helvetica,label=<" << label << ">];" << std::endl;
212
                    }
213
                }
214
            }
215
            dot << "}" << std::endl;
216

217
            if (dot.good()) {
218
                if (!need_silence)
219
                    out << inout::fmt("Создан DOT-файл '").str() << dot_file << "'" << std::endl;
220
            }
221
            else
222
                out << inout::fmt("Не удалось записать в файл '").str() << dot_file << "'" << std::endl;
223
        }
224
        else
225
            out << inout::fmt("Не удалось записать в файл '").str() << dot_file << "'" << std::endl;
226
    }
227

228
    bool grammatize(const std::string & grammar_file,
229
                inout::InputStream_interface & in,
230
                std::ostream & out,
231
                inout::Reporter_abstract & m,
232
                const std::string & json_file_name,
233
                const std::string & st_dot_file_name,
234
                parser::TableBuildMethod grammar_builder_method,
235
                const std::string & dot_file_name,
236
                bool need_state_transitions_info,
237
                bool need_rules_info,
238
                bool need_st_info,
239
                bool need_time_intervals,
240
                bool need_silence,
241
                bool need_build_grammar,
242
                bool need_load_grammar,
243
                bool need_analyze_handles,
244
                bool need_analyze_inserts,
245
                parser::Grammar & grammar)
246
    {
247
        bool    ok = true;
248

249
        if (need_load_grammar) {
250
            if (!parser::loadGrammarDump(grammar_file,grammar))
251
                out << inout::fmt("Ошибка при загрузке дампа грамматики '").str() << grammar_file << "'" << std::endl;
252
            else if (!need_silence) {
253
                out << inout::fmt("Дамп грамматики успешно загружен").str() << std::endl;
254
                out << inout::fmt("Грамматика '%1' построена методом %2")
255
                        .arg(grammar_file)
256
                        .arg(getGrammarBuilderMethodName(grammar.build_method)).str()
257
                    << std::endl;
258
            }
259
        }
260
        else {
261
            double  total_work_time = 0,
262
                    parse_time = 0,
263
                    built_time = 0,
264
                    script_analyze_time = 0;
265

266
            {
267
                LocalClock total_work_clock(total_work_time);
268

269
                ast::FormationFlow  flow;
270
                parser::FuzeRdp     fuze(m, grammar_file, flow);
271

272
                {
273
                    LocalClock  parse_clock(parse_time);
274
                    ok = fuze.parse(in);
275
                }
276

277
                if (ok) {
278
                    loom::Loom loom;
279

280
                    {
281
                        LocalClock built_clock(built_time);
282

283
                        /// \todo Заменить interpret::Interpret на interpret::Interpret_interface!
284
                        interpret::Interpret    inter(interpret::InterpretType::Analyzer, m, loom, flow.tree().files(), flow.tree().root(),
285
                                                {
286
                                                    new interpret::builtins::FuzeAnalyzer(
287
                                                            &inter,
288
                                                            fs::path(grammar_file).stem().string(),
289
                                                            grammar,
290
                                                            grammar_builder_method),
291
                                                });
292

293
                        loom.stretch(&inter);
294
                        loom.wait();
295

296
                        grammar.files = flow.tree().files();
297
                        ok = !inter.errors();
298
                    }
299

300
                    if (ok) {
301
                        if (!need_silence)
302
                            out << inout::fmt("Грамматика '%1' построена методом %2")
303
                                    .arg(grammar_file)
304
                                    .arg(getGrammarBuilderMethodName(grammar.build_method)).str()
305
                                << std::endl;
306

307
                        {
308
                            LocalClock script_analyze_clock(script_analyze_time);
309

310
                            if (need_build_grammar || need_analyze_handles)
311
                                for(const auto & [name, node] : grammar.handlers) 
312
                                    if (name == u"lex") {
313
                                        interpret::Interpret inter(interpret::InterpretType::Preview, m, loom, flow.tree().files(), node);
314
                                        interpret::builtins::BaseInterpret_abstract *
315
                                                            /// \note Используется BaseRunning вместо BaseAnalyzer, чтобы заполнить grammar.lexical
316
                                                            runner      = new interpret::builtins::BaseRunning(&inter);
317
                                        std::shared_ptr<interpret::builtins::LexicalParametersModule> 
318
                                                            lex         = std::make_shared<interpret::builtins::LexicalParametersModule>
319
                                                                                (grammar.lexical);
320

321
                                        runner->importNamespace(u"lex", lex->instantiate(lex), inout::null_token_location);
322
                                        inter.instantiateSemantics({runner});
323
                                        loom.stretch(&inter);
324
                                        loom.wait();
325
                                    }
326
                            
327
                            if (need_analyze_inserts) {
328
                                // loom::Loom loom;
329
                                variable::VariableSet_t 
330
                                        hosts_and_operations = interpret::loadSemanticOperationsEnums(fs::path(grammar_file).parent_path().string());
331

332
                                for(const auto & [name, node] : grammar.handlers) 
333
                                    if (name != u"lex") {
334
                                        interpret::Interpret *  
335
                                                inter       = new interpret::Interpret(interpret::InterpretType::Analyzer, m, loom, flow.tree().files(), node);
336
                                        interpret::builtins::BaseAnalyzer *   
337
                                                astAnalyzer = new interpret::builtins::BaseAnalyzer(inter);
338
                                        std::shared_ptr<interpret::builtins::AstFormationModule> 
339
                                                ast         = std::make_shared<interpret::builtins::AstFormationModule>(hosts_and_operations);
340

341
                                        astAnalyzer->importNamespace(u"ast", ast->instantiate(ast), inout::null_token_location);
342
                                        inter->instantiateSemantics({astAnalyzer});
343
                                        loom.dock(inter, true);
344
                                        loom.stretch(inter);
345
                                    }
346
                            
347
                                for(const parser::GrammarRule & r : grammar.rules) {
348
                                    interpret::Interpret *  inter       = new interpret::Interpret(interpret::InterpretType::Analyzer, m, loom, flow.tree().files(), r.reduce_action);
349
                                    interpret::builtins::BaseAnalyzer *   
350
                                                            astAnalyzer = new interpret::builtins::BaseAnalyzer(inter);
351
                                    std::shared_ptr<interpret::builtins::AstFormationModule> 
352
                                                            ast         = std::make_shared<interpret::builtins::AstFormationModule>(hosts_and_operations);
353

354
                                    astAnalyzer->importNamespace(u"ast", ast->instantiate(ast), inout::null_token_location);
355
                                    inter->instantiateSemantics({astAnalyzer});
356
                                    loom.dock(inter, true);
357
                                    loom.stretch(inter);
358
                                }
359

360
                                loom.wait();
361
                            }
362
                        }
363

364
                        if (need_build_grammar) {
365
                            if (!parser::saveGrammarDump(grammar_file,grammar))
366
                                out << inout::fmt("Ошибка при сохранении дампа грамматики '%1'").arg(grammar_file).str()
367
                                    << std::endl;
368
                            else if (!need_silence)
369
                                out << inout::fmt("Дамп грамматики успешно сохранён").str() 
370
                                    << std::endl;
371
                        }
372

373
                        if (!st_dot_file_name.empty()) {
374
                            if (!need_silence)
375
                                out << inout::fmt("Построение DOT-файла...").str() 
376
                                    << std::endl;
377
                            engine::generateDotFile(st_dot_file_name, flow.tree().root(), {});
378
                        }
379

380
                        if (!json_file_name.empty()) {
381
                            if (!need_silence)
382
                                out << inout::fmt("Сохранение JSON-файла...").str() 
383
                                    << std::endl;
384

385
                            ok = variable::saveJson(variable::toValue(flow.tree()), json_file_name, false);
386

387
                            if (!ok)
388
                                out << inout::fmt("Ошибка сохранения в '%1'").arg(json_file_name).str() 
389
                                    << std::endl;
390
                        }
391
                    }
392
                    else
393
                        out << inout::fmt("При построении грамматики '%1' методом %2 возникли ошибки")
394
                                .arg(grammar_file)
395
                                .arg(getGrammarBuilderMethodName(grammar_builder_method)).str()
396
                            << std::endl;
397
                }
398
                else if (!need_silence)
399
                    out << inout::fmt("Синтаксический анализ описания грамматики '%1' выявил ошибки")
400
                            .arg(grammar_file).str()
401
                        << std::endl;
402
            }
403

404
            if (need_time_intervals) {
405
                out << inout::fmt("Время разбора грамматики:\t%1 ms").arg(parse_time*1000).str()
406
                    << std::endl;
407
                out << inout::fmt("Время построения таблиц:\t%1 ms").arg(built_time*1000).str()
408
                    << std::endl;
409
                out << inout::fmt("Время анализа скриптов:\t\t%1 ms").arg(script_analyze_time*1000).str()
410
                    << std::endl;
411
                out << inout::fmt("Общее время работы с грамматикой:\t%1 ms").arg(total_work_time*1000).str()
412
                    << std::endl;
413
            }
414
        }
415

416
        if (need_st_info) {
417
            out << inout::fmt("Обработчики:").str() << std::endl;
418
            for(const auto & [handler_name,handler_ast] : grammar.handlers) {
419
                out << '\t' << inout::toU8(handler_name) << ":" << std::endl;
420
                engine::printSemanticTree(handler_ast, grammar.files, 0, 2);
421
            }
422
        }
423

424
        if (need_rules_info)
425
            printRules(grammar, grammar.files, need_st_info, out);
426

427
        if (need_state_transitions_info)
428
            printStateTransitions(grammar, out);
429

430
        if (!dot_file_name.empty())
431
            createStateTransitionsGraph(grammar, dot_file_name, grammar_file, need_silence, out);
432

433
        return ok;
434
    }
435
}
436
loom

Использование cookies