FreeCAD

Форк
0
/
StringHasher.cpp 
866 строк · 26.4 Кб
1
// SPDX-License-Identifier: LGPL-2.1-or-later
2

3
/***************************************************************************************************
4
 *                                                                                                 *
5
 *   Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com>                       *
6
 *   Copyright (c) 2023 FreeCAD Project Association                                                *
7
 *                                                                                                 *
8
 *   This file is part of FreeCAD.                                                                 *
9
 *                                                                                                 *
10
 *   FreeCAD is free software: you can redistribute it and/or modify it under the terms of the     *
11
 *   GNU Lesser General Public License as published by the Free Software Foundation, either        *
12
 *   version 2.1 of the License, or (at your option) any later version.                            *
13
 *                                                                                                 *
14
 *   FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;          *
15
 *   without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     *
16
 *   See the GNU Lesser General Public License for more details.                                   *
17
 *                                                                                                 *
18
 *   You should have received a copy of the GNU Lesser General Public License along with           *
19
 *   FreeCAD. If not, see <https://www.gnu.org/licenses/>.                                         *
20
 *                                                                                                 *
21
 **************************************************************************************************/
22

23
#include "PreCompiled.h"
24

25
#include <QCryptographicHash>
26
#include <QHash>
27
#include <deque>
28

29
#include <Base/Console.h>
30
#include <Base/Reader.h>
31
#include <Base/Stream.h>
32
#include <Base/Writer.h>
33

34
#include <boost/algorithm/string/classification.hpp>
35
#include <boost/algorithm/string/split.hpp>
36
#include <boost/bimap.hpp>
37
#include <boost/bimap/set_of.hpp>
38
#include <boost/bimap/unordered_set_of.hpp>
39
#include <boost/iostreams/stream.hpp>
40

41
#include "MappedElement.h"
42
#include "StringHasher.h"
43
#include "StringHasherPy.h"
44
#include "StringIDPy.h"
45

46

47
FC_LOG_LEVEL_INIT("App", true, true)
48

49
namespace bio = boost::iostreams;
50
using namespace App;
51

52
///////////////////////////////////////////////////////////
53

54
struct StringIDHasher
55
{
56
    std::size_t operator()(const StringID* sid) const
57
    {
58
        if (!sid) {
59
            return 0;
60
        }
61
        return qHash(sid->data(), qHash(sid->postfix()));
62
    }
63

64
    bool operator()(const StringID* IDa, const StringID* IDb) const
65
    {
66
        if (IDa == IDb) {
67
            return true;
68
        }
69
        if (!IDa || !IDb) {
70
            return false;
71
        }
72
        return IDa->data() == IDb->data() && IDa->postfix() == IDb->postfix();
73
    }
74
};
75

76
using HashMapBase =
77
    boost::bimap<boost::bimaps::unordered_set_of<StringID*, StringIDHasher, StringIDHasher>,
78
                 boost::bimaps::set_of<long>>;
79

80
class StringHasher::HashMap: public HashMapBase
81
{
82
public:
83
    bool SaveAll = false;
84
    int Threshold = 0;
85
};
86

87
///////////////////////////////////////////////////////////
88

89
TYPESYSTEM_SOURCE_ABSTRACT(App::StringID, Base::BaseClass)
90

91
StringID::~StringID()
92
{
93
    if (_hasher) {
94
        _hasher->_hashes->right.erase(_id);
95
    }
96
}
97

98
PyObject* StringID::getPyObject()
99
{
100
    return new StringIDPy(this);
101
}
102

103
PyObject* StringID::getPyObjectWithIndex(int index)
104
{
105
    auto res = new StringIDPy(this);
106
    res->_index = index;
107
    return res;
108
}
109

110
std::string StringID::toString(int index) const
111
{
112
    std::ostringstream ss;
113
    ss << '#' << std::hex << value();
114
    if (index != 0) {
115
        ss << ':' << index;
116
    }
117
    return ss.str();
118
}
119

120
StringID::IndexID StringID::fromString(const char* name, bool eof, int size)
121
{
122
    IndexID res {};
123
    res.id = 0;
124
    res.index = 0;
125
    if (!name) {
126
        res.id = -1;
127
        return res;
128
    }
129
    if (size < 0) {
130
        size = static_cast<int>(std::strlen(name));
131
    }
132
    bio::stream<bio::array_source> iss(name, size);
133
    char sep = 0;
134
    char sep2 = 0;
135
    iss >> sep >> std::hex >> res.id >> sep2 >> res.index;
136
    if ((eof && !iss.eof()) || sep != '#' || (sep2 != 0 && sep2 != ':')) {
137
        res.id = -1;
138
        return res;
139
    }
140
    return res;
141
}
142

143
std::string StringID::dataToText(int index) const
144
{
145
    if (isHashed() || isBinary()) {
146
        return _data.toBase64().constData();
147
    }
148

149
    std::string res(_data.constData());
150
    if (index != 0) {
151
        res += std::to_string(index);
152
    }
153
    if (_postfix.size() != 0) {
154
        res += _postfix.constData();
155
    }
156
    return res;
157
}
158

159
void StringID::mark() const
160
{
161
    if (isMarked()) {
162
        return;
163
    }
164
    _flags.setFlag(Flag::Marked);
165
    for (auto& sid : _sids) {
166
        sid.deref().mark();
167
    }
168
}
169

170
///////////////////////////////////////////////////////////
171

172
TYPESYSTEM_SOURCE(App::StringHasher, Base::Persistence)
173

174
StringHasher::StringHasher()
175
    : _hashes(new HashMap)
176
{}
177

178
StringHasher::~StringHasher()
179
{
180
    clear();
181
}
182

183
void StringHasher::setSaveAll(bool enable)
184
{
185
    if (_hashes->SaveAll == enable) {
186
        return;
187
    }
188
    _hashes->SaveAll = enable;
189
    compact();
190
}
191

192
void StringHasher::compact()
193
{
194
    if (_hashes->SaveAll) {
195
        return;
196
    }
197

198
    // Make a list of all the table entries that have only a single reference and are not marked
199
    // "persistent"
200
    std::deque<StringIDRef> pendings;
201
    for (auto& hasher : _hashes->right) {
202
        if (!hasher.second->isPersistent() && hasher.second->getRefCount() == 1) {
203
            pendings.emplace_back(hasher.second);
204
        }
205
    }
206

207
    // Recursively remove the unused StringIDs
208
    while (!pendings.empty()) {
209
        StringIDRef sid = pendings.front();
210
        pendings.pop_front();
211
        // Try to erase the map entry for this StringID
212
        if (_hashes->right.erase(sid.value()) == 0U) {
213
            continue;// If nothing was erased, there's nothing more to do
214
        }
215
        sid._sid->_hasher = nullptr;
216
        sid._sid->unref();
217
        for (auto& hasher : sid._sid->_sids) {
218
            if (hasher._sid->_hasher == this && !hasher._sid->isPersistent()
219
                && hasher._sid->getRefCount() == 2) {
220
                // If the related StringID also uses this hasher, is not marked persistent, and has
221
                // a current reference count of 2 (which will be its hasher reference and its entry
222
                // in the related SIDs list), then prep it for removal as well.
223
                pendings.push_back(hasher);
224
            }
225
        }
226
    }
227
}
228

229
bool StringHasher::getSaveAll() const
230
{
231
    return _hashes->SaveAll;
232
}
233

234
void StringHasher::setThreshold(int threshold)
235
{
236
    _hashes->Threshold = threshold;
237
}
238

239
int StringHasher::getThreshold() const
240
{
241
    return _hashes->Threshold;
242
}
243

244
long StringHasher::lastID() const
245
{
246
    if (_hashes->right.empty()) {
247
        return 0;
248
    }
249
    auto it = _hashes->right.end();
250
    --it;
251
    return it->first;
252
}
253

254
StringIDRef StringHasher::getID(const char* text, int len, bool hashable)
255
{
256
    if (len < 0) {
257
        len = static_cast<int>(strlen(text));
258
    }
259
    return getID(QByteArray::fromRawData(text, len), hashable ? Option::Hashable : Option::None);
260
}
261

262
StringIDRef StringHasher::getID(const QByteArray& data, Options options)
263
{
264
    bool binary = options.testFlag(Option::Binary);
265
    bool hashable = options.testFlag(Option::Hashable);
266
    bool nocopy = options.testFlag(Option::NoCopy);
267

268
    bool hashed = hashable && _hashes->Threshold > 0 && (int)data.size() > _hashes->Threshold;
269

270
    StringID dataID;
271
    if (hashed) {
272
        QCryptographicHash hasher(QCryptographicHash::Sha1);
273
        hasher.addData(data);
274
        dataID._data = hasher.result();
275
    }
276
    else {
277
        dataID._data = data;
278
    }
279

280
    auto it = _hashes->left.find(&dataID);
281
    if (it != _hashes->left.end()) {
282
        return {it->first};
283
    }
284

285
    if (!hashed && !nocopy) {
286
        // if not hashed, make a deep copy of the data
287
        dataID._data = QByteArray(data.constData(), data.size());
288
    }
289

290
    StringID::Flags flags(StringID::Flag::None);
291
    if (binary) {
292
        flags.setFlag(StringID::Flag::Binary);
293
    }
294
    if (hashed) {
295
        flags.setFlag(StringID::Flag::Hashed);
296
    }
297
    StringIDRef sid(new StringID(lastID() + 1, dataID._data, flags));
298
    return {insert(sid)};
299
}
300

301
StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<StringIDRef>& sids)
302
{
303
    StringID tempID;
304
    tempID._postfix = name.postfixBytes();
305

306
    Data::IndexedName indexed;
307
    if (tempID._postfix.size() != 0) {
308
        // Only check for IndexedName if there is postfix, because of the way
309
        // we restore the StringID. See StringHasher::saveStream/restoreStreamNew()
310
        indexed = Data::IndexedName(name.dataBytes());
311
    }
312
    if (indexed) {
313
        // If this is an IndexedName, then _data only stores the base part of the name, without the
314
        // integer index
315
        tempID._data =
316
            QByteArray::fromRawData(indexed.getType(), static_cast<int>(strlen(indexed.getType())));
317
    }
318
    else {
319
        // Store the entire name in _data, but temporarily re-use the existing memory
320
        tempID._data = name.dataBytes();
321
    }
322

323
    // Check to see if there is already an entry in the hash table for this StringID
324
    auto it = _hashes->left.find(&tempID);
325
    if (it != _hashes->left.end()) {
326
        auto res = StringIDRef(it->first);
327
        if (indexed) {
328
            res._index = indexed.getIndex();
329
        }
330
        return res;
331
    }
332

333
    if (!indexed && name.isRaw()) {
334
        // Make a copy of the memory if we didn't do so earlier
335
        tempID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size());
336
    }
337

338
    // If the postfix is not already encoded, use getID to encode it:
339
    StringIDRef postfixRef;
340
    if ((tempID._postfix.size() != 0) && tempID._postfix.indexOf("#") < 0) {
341
        postfixRef = getID(tempID._postfix);
342
        postfixRef.toBytes(tempID._postfix);
343
    }
344

345
    // If _data is an IndexedName, use getID to encode it:
346
    StringIDRef indexRef;
347
    if (indexed) {
348
        indexRef = getID(tempID._data);
349
    }
350

351
    // The real StringID object that we are going to insert
352
    StringIDRef newStringIDRef(new StringID(lastID() + 1, tempID._data));
353
    StringID& newStringID = *newStringIDRef._sid;
354
    if (tempID._postfix.size() != 0) {
355
        newStringID._flags.setFlag(StringID::Flag::Postfixed);
356
        newStringID._postfix = tempID._postfix;
357
    }
358

359
    // Count the related SIDs that use this hasher
360
    int numSIDs = 0;
361
    for (const auto& relatedID : sids) {
362
        if (relatedID && relatedID._sid->_hasher == this) {
363
            ++numSIDs;
364
        }
365
    }
366

367
    int numAddedSIDs = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0);
368
    if (numSIDs == sids.size() && !postfixRef && !indexRef) {
369
        // The simplest case: just copy the whole list
370
        newStringID._sids = sids;
371
    }
372
    else {
373
        // Put the added SIDs at the front of the SID list
374
        newStringID._sids.reserve(numSIDs + numAddedSIDs);
375
        if (postfixRef) {
376
            newStringID._flags.setFlag(StringID::Flag::PostfixEncoded);
377
            newStringID._sids.push_back(postfixRef);
378
        }
379
        if (indexRef) {
380
            newStringID._flags.setFlag(StringID::Flag::Indexed);
381
            newStringID._sids.push_back(indexRef);
382
        }
383
        // Append the sids from the input list whose hasher is this one
384
        for (const auto& relatedID : sids) {
385
            if (relatedID && relatedID._sid->_hasher == this) {
386
                newStringID._sids.push_back(relatedID);
387
            }
388
        }
389
    }
390

391
    // If the number of related IDs is larger than some threshold (hardcoded to 10 right now), then
392
    // remove any duplicates (ignoring the new SIDs we may have just added)
393
    const int relatedIDSizeThreshold {10};
394
    if (newStringID._sids.size() > relatedIDSizeThreshold) {
395
        std::sort(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end());
396
        newStringID._sids.erase(
397
            std::unique(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end()),
398
            newStringID._sids.end());
399
    }
400

401
    // If the new StringID has a postfix, but is not indexed, see if the data string itself
402
    // contains an index.
403
    if ((newStringID._postfix.size() != 0) && !indexed) {
404
        // Use the fromString function to parse the new StringID's data field for a possible index
405
        StringID::IndexID res = StringID::fromString(newStringID._data);
406
        if (res.id > 0) {// If the data had an index
407
            if (res.index != 0) {
408
                indexed.setIndex(res.index);
409
                newStringID._data.resize(newStringID._data.lastIndexOf(':')+1);
410
            }
411
            int offset = newStringID.isPostfixEncoded() ? 1 : 0;
412
            // Search for the SID with that index
413
            for (int i = offset; i < newStringID._sids.size(); ++i) {
414
                if (newStringID._sids[i].value() == res.id) {
415
                    if (i != offset) {
416
                        // If this SID is not already the first element in sids, move it there by
417
                        // swapping it with whatever WAS there
418
                        std::swap(newStringID._sids[offset], newStringID._sids[i]);
419
                    }
420
                    if (res.index != 0) {
421
                        newStringID._flags.setFlag(StringID::Flag::PrefixIDIndex);
422
                    }
423
                    else {
424
                        newStringID._flags.setFlag(StringID::Flag::PrefixID);
425
                    }
426
                    break;
427
                }
428
            }
429
        }
430
    }
431

432
    return {insert(newStringIDRef), indexed.getIndex()};
433
}
434

435
StringIDRef StringHasher::getID(long id, int index) const
436
{
437
    if (id <= 0) {
438
        return {};
439
    }
440
    auto it = _hashes->right.find(id);
441
    if (it == _hashes->right.end()) {
442
        return {};
443
    }
444
    StringIDRef res(it->second);
445
    res._index = index;
446
    return res;
447
}
448

449
void StringHasher::setPersistenceFileName(const char* filename) const
450
{
451
    if (!filename) {
452
        filename = "";
453
    }
454
    _filename = filename;
455
}
456

457
const std::string& StringHasher::getPersistenceFileName() const
458
{
459
    return _filename;
460
}
461

462
void StringHasher::Save(Base::Writer& writer) const
463
{
464

465
    std::size_t count = _hashes->SaveAll ? _hashes->size() : this->count();
466

467
    writer.Stream() << writer.ind() << "<StringHasher saveall=\"" << _hashes->SaveAll
468
                    << "\" threshold=\"" << _hashes->Threshold << "\"";
469

470
    if (count == 0U) {
471
        writer.Stream() << " count=\"0\"></StringHasher>\n";
472
        return;
473
    }
474

475
    writer.Stream() << " count=\"0\" new=\"1\"/>\n";
476

477
    writer.Stream() << writer.ind() << "<StringHasher2 ";
478
    if (!_filename.empty()) {
479
        writer.Stream() << " file=\"" << writer.addFile((_filename + ".txt").c_str(), this)
480
                        << "\"/>\n";
481
        return;
482
    }
483

484
    writer.Stream() << " count=\"" << count << "\">\n";
485
    saveStream(writer.beginCharStream() << '\n');
486
    writer.endCharStream() << '\n';
487
    writer.Stream() << writer.ind() << "</StringHasher2>\n";
488
}
489

490
void StringHasher::SaveDocFile(Base::Writer& writer) const
491
{
492
    std::size_t count = _hashes->SaveAll ? this->size() : this->count();
493
    writer.Stream() << "StringTableStart v1 " << count << '\n';
494
    saveStream(writer.Stream());
495
}
496

497
void StringHasher::saveStream(std::ostream& stream) const
498
{
499
    Base::TextOutputStream textStreamWrapper(stream);
500
    boost::io::ios_flags_saver ifs(stream);
501
    stream << std::hex;
502

503
    long anchor = 0;
504
    const StringID* last = nullptr;
505
    long lastID = 0;
506
    bool relative = false;
507

508
    for (auto& hasher : _hashes->right) {
509
        auto& d = *hasher.second;
510
        long id = d._id;
511
        if (!_hashes->SaveAll && !d.isMarked() && !d.isPersistent()) {
512
            continue;
513
        }
514

515
        // We use relative coding to save space. But in order to have some
516
        // minimum protection against corruption, write an absolute value every
517
        // once a while.
518
        relative = (id - anchor) < 1000;
519
        if (relative) {
520
            stream << '-' << id - lastID;
521
        }
522
        else {
523
            anchor = id;
524
            stream << id;
525
        }
526
        lastID = id;
527

528
        int offset = d.isPostfixEncoded() ? 1 : 0;
529

530
        StringID::IndexID prefixID {};
531
        prefixID.id = 0;
532
        prefixID.index = 0;
533
        if (d.isPrefixID()) {
534
            assert(d._sids.size() > offset);
535
            prefixID.id = d._sids[offset].value();
536
        }
537
        else if (d.isPrefixIDIndex()) {
538
            prefixID = StringID::fromString(d._data);
539
            assert(d._sids.size() > offset && d._sids[offset].value() == prefixID.id);
540
        }
541

542
        auto flags = d._flags;
543
        flags.setFlag(StringID::Flag::Marked, false);
544
        stream << '.' << flags.toUnderlyingType();
545

546
        int position = 0;
547
        if (!relative) {
548
            for (; position < d._sids.size(); ++position) {
549
                stream << '.' << d._sids[position].value();
550
            }
551
        }
552
        else {
553
            if (last) {
554
                for (; position < d._sids.size() && position < last->_sids.size(); ++position) {
555
                    long m = last->_sids[position].value();
556
                    long n = d._sids[position].value();
557
                    if (n < m) {
558
                        stream << ".-" << m - n;
559
                    }
560
                    else {
561
                        stream << '.' << n - m;
562
                    }
563
                }
564
            }
565
            for (; position < d._sids.size(); ++position) {
566
                stream << '.' << id - d._sids[position].value();
567
            }
568
        }
569

570
        last = &d;
571

572
        // Having postfix means it is a geometry element name, which
573
        // guarantees to be a single line without space. So it is safe to
574
        // store in raw stream.
575
        if (d.isPostfixed()) {
576
            if (!d.isPrefixIDIndex() && !d.isIndexed() && !d.isPrefixID()) {
577
                stream << ' ' << d._data.constData();
578
            }
579

580
            if (!d.isPostfixEncoded()) {
581
                stream << ' ' << d._postfix.constData();
582
            }
583
            stream << '\n';
584
        }
585
        else {
586
            // Reaching here means the string may contain space and newlines
587
            // We rely on OutputStream (i.e. textStreamWrapper) to save the string.
588
            stream << ' ';
589
            textStreamWrapper << d._data.constData();
590
        }
591
    }
592
}
593

594
void StringHasher::RestoreDocFile(Base::Reader& reader)
595
{
596
    std::string marker;
597
    std::string ver;
598
    reader >> marker;
599
    std::size_t count = 0;
600
    _hashes->clear();
601
    if (marker == "StringTableStart") {
602
        reader >> ver >> count;
603
        if (ver != "v1") {
604
            FC_WARN("Unknown string table format");
605
        }
606
        restoreStreamNew(reader, count);
607
        return;
608
    }
609
    reader >> count;
610
    restoreStream(reader, count);
611
}
612

613
void StringHasher::restoreStreamNew(std::istream& stream, std::size_t count)
614
{
615
    Base::TextInputStream asciiStream (stream);
616
    _hashes->clear();
617
    std::string content;
618
    boost::io::ios_flags_saver ifs(stream);
619
    stream >> std::hex;
620
    std::vector<std::string> tokens;
621
    long lastid = 0;
622
    const StringID* last = nullptr;
623

624
    std::string tmp;
625

626
    for (uint32_t i = 0; i < count; ++i) {
627
        if (!(stream >> tmp)) {
628
            FC_THROWM(Base::RuntimeError, "Invalid string table");
629
        }
630

631
        tokens.clear();
632
        boost::split(tokens, tmp, boost::is_any_of("."));
633
        if (tokens.size() < 2) {
634
            FC_THROWM(Base::RuntimeError, "Invalid string table");
635
        }
636

637
        long id = 0;
638
        bool relative = false;
639
        if (tokens[0][0] == '-') {
640
            relative = true;
641
            id = lastid + strtol(tokens[0].c_str() + 1, nullptr, 16);
642
        }
643
        else {
644
            id = strtol(tokens[0].c_str(), nullptr, 16);
645
        }
646

647
        lastid = id;
648

649
        unsigned long flag = strtol(tokens[1].c_str(), nullptr, 16);
650
        StringIDRef sid(new StringID(id, QByteArray(), static_cast<StringID::Flag>(flag)));
651

652
        StringID& d = *sid._sid;
653
        d._sids.reserve(tokens.size() - 2);
654

655
        int j = 2;
656
        if (relative && last) {
657
            for (; j < (int)tokens.size() && j - 2 < last->_sids.size(); ++j) {
658
                long m = last->_sids[j - 2].value();
659
                long n;
660
                if (tokens[j][0] == '-') {
661
                    n = -strtol(&tokens[j][1], nullptr, 16);
662
                }
663
                else {
664
                    n = strtol(&tokens[j][0], nullptr, 16);
665
                }
666
                StringIDRef sid = getID(m + n);
667
                if (!sid) {
668
                    FC_THROWM(Base::RuntimeError, "Invalid string id reference");
669
                }
670
                d._sids.push_back(sid);
671
            }
672
        }
673
        for (; j < (int)tokens.size(); ++j) {
674
            long n = strtol(tokens[j].data(), nullptr, 16);
675
            StringIDRef sid = getID(relative ? id - n : n);
676
            if (!sid) {
677
                FC_THROWM(Base::RuntimeError, "Invalid string id reference");
678
            }
679
            d._sids.push_back(sid);
680
        }
681

682
        if (!d.isPostfixed()) {
683
            asciiStream >> content;
684
            if (d.isHashed() || d.isBinary()) {
685
                d._data = QByteArray::fromBase64(content.c_str());
686
            }
687
            else {
688
                d._data = content.c_str();
689
            }
690
        }
691
        else {
692
            int offset = 0;
693
            if (d.isPostfixEncoded()) {
694
                offset = 1;
695
                if (d._sids.empty()) {
696
                    FC_THROWM(Base::RuntimeError, "Missing string postfix");
697
                }
698
                d._postfix = d._sids[0]._sid->_data;
699
            }
700
            if (d.isIndexed()) {
701
                if (d._sids.size() <= offset) {
702
                    FC_THROWM(Base::RuntimeError, "Missing string prefix");
703
                }
704
                d._data = d._sids[offset]._sid->_data;
705
            }
706
            else if (d.isPrefixID() || d.isPrefixIDIndex()) {
707
                if (d._sids.size() <= offset) {
708
                    FC_THROWM(Base::RuntimeError, "Missing string prefix id");
709
                }
710
                d._data = d._sids[offset]._sid->toString(0).c_str();
711
                if (d.isPrefixIDIndex())
712
                    d._data += ":";
713
            }
714
            else {
715
                stream >> content;
716
                d._data = content.c_str();
717
            }
718
            if (!d.isPostfixEncoded()) {
719
                stream >> content;
720
                d._postfix = content.c_str();
721
            }
722
        }
723

724
        last = insert(sid);
725
    }
726
}
727

728
StringID* StringHasher::insert(const StringIDRef& sid)
729
{
730
    assert(sid && sid._sid->_hasher == nullptr);
731
    auto& hasher = *sid._sid;
732
    hasher._hasher = this;
733
    hasher.ref();
734
    auto res = _hashes->right.insert(_hashes->right.end(),
735
                                     HashMap::right_map::value_type(sid.value(), &hasher));
736
    if (res->second != &hasher) {
737
        hasher._hasher = nullptr;
738
        hasher.unref();
739
    }
740
    return res->second;
741
}
742

743
void StringHasher::restoreStream(std::istream& stream, std::size_t count)
744
{
745
    _hashes->clear();
746
    std::string content;
747
    for (uint32_t i = 0; i < count; ++i) {
748
        int32_t id = 0;
749
        uint8_t type = 0;
750
        stream >> id >> type >> content;
751
        StringIDRef sid = new StringID(id, QByteArray(), static_cast<StringID::Flag>(type));
752
        if (sid.isHashed() || sid.isBinary()) {
753
            sid._sid->_data = QByteArray::fromBase64(content.c_str());
754
        }
755
        else {
756
            sid._sid->_data = QByteArray(content.c_str());
757
        }
758
        insert(sid);
759
    }
760
}
761

762
void StringHasher::clear()
763
{
764
    for (auto& hasher : _hashes->right) {
765
        hasher.second->_hasher = nullptr;
766
        hasher.second->unref();
767
    }
768
    _hashes->clear();
769
}
770

771
size_t StringHasher::size() const
772
{
773
    return _hashes->size();
774
}
775

776
size_t StringHasher::count() const
777
{
778
    size_t count = 0;
779
    for (auto& hasher : _hashes->right) {
780
        if (hasher.second->isMarked() || hasher.second->isPersistent() ) {
781
            ++count;
782
        }
783
    }
784
    return count;
785
}
786

787
void StringHasher::Restore(Base::XMLReader& reader)
788
{
789
    clear();
790
    reader.readElement("StringHasher");
791
    _hashes->SaveAll = reader.getAttributeAsInteger("saveall") != 0L;
792
    _hashes->Threshold = static_cast<int>(reader.getAttributeAsInteger("threshold"));
793

794
    bool newTag = false;
795
    if (reader.hasAttribute("new") && reader.getAttributeAsInteger("new") > 0) {
796
        reader.readElement("StringHasher2");
797
        newTag = true;
798
    }
799

800
    if (reader.hasAttribute("file")) {
801
        const char* file = reader.getAttribute("file");
802
        if (*file != '\0') {
803
            reader.addFile(file, this);
804
        }
805
        return;
806
    }
807

808
    std::size_t count = reader.getAttributeAsUnsigned("count");
809
    if (newTag) {
810
        try {
811
        restoreStreamNew(reader.beginCharStream(), count);
812
        } catch (const Base::Exception &e) {
813
            e.ReportException();
814
            FC_ERR("Failed to restore string table: full-document recompute strongly recommended.");
815
        }
816
        reader.readEndElement("StringHasher2");
817
        return;
818
    }
819
    if ((count != 0U) && reader.FileVersion > 1) {
820
        restoreStream(reader.beginCharStream(), count);
821
    }
822
    else {
823
        for (std::size_t i = 0; i < count; ++i) {
824
            reader.readElement("Item");
825
            StringIDRef sid;
826
            long id = reader.getAttributeAsInteger("id");
827
            bool hashed = reader.hasAttribute("hash");
828
            if (hashed || reader.hasAttribute("data")) {
829
                const char* value =
830
                    hashed ? reader.getAttribute("hash") : reader.getAttribute("data");
831
                sid = new StringID(id, QByteArray::fromBase64(value), StringID::Flag::Hashed);
832
            }
833
            else {
834
                sid = new StringID(id, QByteArray(reader.getAttribute("text")));
835
            }
836
            insert(sid);
837
        }
838
    }
839
    reader.readEndElement("StringHasher");
840
}
841

842
unsigned int StringHasher::getMemSize() const
843
{
844
    return (_hashes->SaveAll ? size() : count()) * 10;
845
}
846

847
PyObject* StringHasher::getPyObject()
848
{
849
    return new StringHasherPy(this);
850
}
851

852
std::map<long, StringIDRef> StringHasher::getIDMap() const
853
{
854
    std::map<long, StringIDRef> ret;
855
    for (auto& hasher : _hashes->right) {
856
        ret.emplace_hint(ret.end(), hasher.first, StringIDRef(hasher.second));
857
    }
858
    return ret;
859
}
860

861
void StringHasher::clearMarks() const
862
{
863
    for (auto& hasher : _hashes->right) {
864
        hasher.second->_flags.setFlag(StringID::Flag::Marked, false);
865
    }
866
}
867

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.