loom
204 строки · 7.0 Кб
1/*
2MIT License
3
4Copyright (c) 2021 МГТУ им. Н.Э. Баумана, кафедра ИУ-6, Михаил Фетисов,
5
6https://bmstu.codes/lsx/simodo/loom
7*/
8
9#include "simodo/variable/Module_interface.h"
10#include "simodo/variable/VariableSetWrapper.h"
11#include "simodo/inout/convert/functions.h"
12#include "simodo/variable/json/Serialization.h"
13
14#include <memory>
15#include <filesystem>
16#include <cassert>
17#include <algorithm>
18
19#ifdef CROSS_WIN
20// MinGW related workaround
21#define BOOST_DLL_FORCE_ALIAS_INSTANTIATION
22#endif
23
24#include <boost/dll/alias.hpp>
25
26using namespace simodo;
27using namespace simodo::variable;
28using namespace simodo::inout;
29
30namespace fs = std::filesystem;
31
32namespace
33{
34Value setup(Module_interface * host, const VariableSetWrapper & args);
35Value symbols(Module_interface * host, const VariableSetWrapper & args);
36Value produceTokens(Module_interface * host, const VariableSetWrapper & args);
37}
38
39class MainTokenizer : public Module_interface
40{
41// ModuleFactory_interface * _factory;
42Value _substitutions_value;
43
44public:
45// MainTokenizer(ModuleFactory_interface * factory) : _factory(factory) {}
46
47Value setup(const std::string & path_to_data, const std::string & language);
48Value symbols();
49Value produceTokens(const std::u16string & text_to_parse, int position, context_index_t context);
50
51virtual version_t version() const override { return lib_version(); }
52
53virtual Object instantiate(std::shared_ptr<variable::Module_interface> module_object) override
54{
55return Object {{
56// {u"version", u"0.1"},
57{u"specialization", u"Word"},
58{u"setup", {ValueType::Function, Object {{
59{u"@", ExternalFunction {module_object, ::setup}},
60{{}, ValueType::String},
61{u"path_to_data", ValueType::String},
62{u"language", ValueType::String},
63}}}},
64{u"symbols", {ValueType::Function, Object {{
65{u"@", ExternalFunction {module_object, ::symbols}},
66{{}, ValueType::Array},
67}}}},
68{u"produceTokens", {ValueType::Function, Object {{
69{u"@", ExternalFunction {module_object, ::produceTokens}},
70{{}, ValueType::Object},
71{u"text_to_parse", ValueType::String},
72{u"position", ValueType::Int},
73{u"context", ValueType::Int},
74}}}},
75}};
76}
77
78// virtual ModuleFactory_interface * factory() override { return _factory; }
79
80// Factory method
81static std::shared_ptr<Module_interface> create() {
82return std::make_shared<MainTokenizer>();
83}
84};
85
86BOOST_DLL_ALIAS(
87MainTokenizer::create, // <-- this function is exported with...
88create_simodo_module // <-- ...this alias name
89)
90
91namespace
92{
93Value setup(Module_interface * host, const VariableSetWrapper & args)
94{
95// Эти условия должны проверяться в вызывающем коде и при необходимости выполняться преобразования
96assert(host != nullptr);
97assert(args.size() == 2);
98assert(args[0].value().type() == ValueType::String);
99assert(args[1].value().type() == ValueType::String);
100
101MainTokenizer * main = static_cast<MainTokenizer *>(host);
102return main->setup(toU8(args[0].value().getString()),
103toU8(args[1].value().getString()));
104}
105
106Value symbols(Module_interface * host, const VariableSetWrapper & )
107{
108// Эти условия должны проверяться в вызывающем коде и при необходимости выполняться преобразования
109assert(host != nullptr);
110
111MainTokenizer * main = static_cast<MainTokenizer *>(host);
112return main->symbols();
113}
114
115Value produceTokens(Module_interface * host, const VariableSetWrapper & args)
116{
117// Эти условия должны проверяться в вызывающем коде и при необходимости выполняться преобразования
118assert(host != nullptr);
119assert(args.size() == 3);
120assert(args[0].value().type() == ValueType::String);
121assert(args[1].value().type() == ValueType::Int);
122assert(args[2].value().type() == ValueType::Int);
123
124MainTokenizer * main = static_cast<MainTokenizer *>(host);
125context_index_t context = NO_TOKEN_CONTEXT_INDEX;
126if (args[2].value().getInt() >= 0 )
127context = static_cast<context_index_t>(args[2].value().getInt());
128
129return main->produceTokens(args[0].value().getString(), args[1].value().getInt(), context);
130}
131
132}
133
134Value MainTokenizer::setup(const std::string & path_to_data, const std::string & language)
135{
136fs::path path_to_substitutions = path_to_data;
137path_to_substitutions /= "substitutions/" + language + ".json";
138
139if (!fs::exists(path_to_substitutions))
140return {};
141
142loadJson(path_to_substitutions.string(), _substitutions_value);
143if (_substitutions_value.type() == ValueType::Object)
144return u"";
145
146return {}; // Не ОК
147}
148
149Value MainTokenizer::symbols()
150{
151if (_substitutions_value.type() != ValueType::Object)
152return {};
153
154std::vector<variable::Value> result;
155
156const std::shared_ptr<Object> substitutions = _substitutions_value.getObject();
157for(const Variable & sub : substitutions->variables()) {
158if (sub.type() != ValueType::Array)
159continue;
160const std::shared_ptr<Array> array = sub.value().getArray();
161for(const variable::Value & v : array->values()) {
162if (v.type() != variable::ValueType::String)
163continue;
164Object symbol_data {{
165{u"symbol", v.getString()},
166{u"type", sub.name()},
167}};
168result.push_back(symbol_data);
169}
170}
171
172return result;
173}
174
175Value MainTokenizer::produceTokens(const std::u16string & text_to_parse, int position, context_index_t )
176{
177if (_substitutions_value.type() != ValueType::Object)
178return {};
179
180Array tokens;
181const std::shared_ptr<Object> substitutions = _substitutions_value.getObject();
182for(const Variable & sub : substitutions->variables()) {
183if (sub.type() != ValueType::Array)
184continue;
185const std::shared_ptr<Array> array = sub.value().getArray();
186auto it = std::find_if(array->values().begin(), array->values().end(),
187[text_to_parse](const Value & x)
188{
189return x.type() == ValueType::String && x.getString() == text_to_parse;
190});
191if (it != array->values().end()) {
192tokens.values().push_back(Object {{
193{u"token", text_to_parse},
194{u"type", sub.name()},
195{u"position", position},
196}});
197break;
198}
199}
200
201return Object {{
202{u"tokens", tokens},
203}};
204}
205
206