FreeCAD

Форк
0
/
StringHasher.cpp 
875 строк · 26.5 Кб
1
// SPDX-License-Identifier: LGPL-2.1-or-later
2

3
/***************************************************************************************************
4
 *                                                                                                 *
5
 *   Copyright (c) 2022 Zheng, Lei (realthunder) <realthunder.dev@gmail.com>                       *
6
 *   Copyright (c) 2023 FreeCAD Project Association                                                *
7
 *                                                                                                 *
8
 *   This file is part of FreeCAD.                                                                 *
9
 *                                                                                                 *
10
 *   FreeCAD is free software: you can redistribute it and/or modify it under the terms of the     *
11
 *   GNU Lesser General Public License as published by the Free Software Foundation, either        *
12
 *   version 2.1 of the License, or (at your option) any later version.                            *
13
 *                                                                                                 *
14
 *   FreeCAD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;          *
15
 *   without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     *
16
 *   See the GNU Lesser General Public License for more details.                                   *
17
 *                                                                                                 *
18
 *   You should have received a copy of the GNU Lesser General Public License along with           *
19
 *   FreeCAD. If not, see <https://www.gnu.org/licenses/>.                                         *
20
 *                                                                                                 *
21
 **************************************************************************************************/
22

23
#include "PreCompiled.h"
24

25
#include <QCryptographicHash>
26
#include <QHash>
27
#include <deque>
28

29
#include <Base/Console.h>
30
#include <Base/Reader.h>
31
#include <Base/Stream.h>
32
#include <Base/Writer.h>
33

34
#include <boost/algorithm/string/classification.hpp>
35
#include <boost/algorithm/string/split.hpp>
36
#include <boost/bimap.hpp>
37
#include <boost/bimap/set_of.hpp>
38
#include <boost/bimap/unordered_set_of.hpp>
39
#include <boost/iostreams/stream.hpp>
40

41
#include "MappedElement.h"
42
#include "StringHasher.h"
43
#include "StringHasherPy.h"
44
#include "StringIDPy.h"
45

46

47
FC_LOG_LEVEL_INIT("App", true, true)
48

49
namespace bio = boost::iostreams;
50
using namespace App;
51

52
///////////////////////////////////////////////////////////
53

54
struct StringIDHasher
55
{
56
    std::size_t operator()(const StringID* sid) const
57
    {
58
        if (!sid) {
59
            return 0;
60
        }
61
        return qHash(sid->data(), qHash(sid->postfix()));
62
    }
63

64
    bool operator()(const StringID* IDa, const StringID* IDb) const
65
    {
66
        if (IDa == IDb) {
67
            return true;
68
        }
69
        if (!IDa || !IDb) {
70
            return false;
71
        }
72
        return IDa->data() == IDb->data() && IDa->postfix() == IDb->postfix();
73
    }
74
};
75

76
using HashMapBase =
77
    boost::bimap<boost::bimaps::unordered_set_of<StringID*, StringIDHasher, StringIDHasher>,
78
                 boost::bimaps::set_of<long>>;
79

80
class StringHasher::HashMap: public HashMapBase
81
{
82
public:
83
    bool SaveAll = false;
84
    int Threshold = 0;
85
};
86

87
///////////////////////////////////////////////////////////
88

89
TYPESYSTEM_SOURCE_ABSTRACT(App::StringID, Base::BaseClass)
90

91
StringID::~StringID()
92
{
93
    if (_hasher) {
94
        _hasher->_hashes->right.erase(_id);
95
    }
96
}
97

98
PyObject* StringID::getPyObject()
99
{
100
    return new StringIDPy(this);
101
}
102

103
PyObject* StringID::getPyObjectWithIndex(int index)
104
{
105
    auto res = new StringIDPy(this);
106
    res->_index = index;
107
    return res;
108
}
109

110
std::string StringID::toString(int index) const
111
{
112
    std::ostringstream ss;
113
    ss << '#' << std::hex << value();
114
    if (index != 0) {
115
        ss << ':' << index;
116
    }
117
    return ss.str();
118
}
119

120
StringID::IndexID StringID::fromString(const char* name, bool eof, int size)
121
{
122
    IndexID res {};
123
    res.id = 0;
124
    res.index = 0;
125
    if (!name) {
126
        res.id = -1;
127
        return res;
128
    }
129
    if (size < 0) {
130
        size = static_cast<int>(std::strlen(name));
131
    }
132
    bio::stream<bio::array_source> iss(name, size);
133
    char sep = 0;
134
    char sep2 = 0;
135
    iss >> sep >> std::hex >> res.id >> sep2 >> res.index;
136
    if ((eof && !iss.eof()) || sep != '#' || (sep2 != 0 && sep2 != ':')) {
137
        res.id = -1;
138
        return res;
139
    }
140
    return res;
141
}
142

143
std::string StringID::dataToText(int index) const
144
{
145
    if (isHashed() || isBinary()) {
146
        return _data.toBase64().constData();
147
    }
148

149
    std::string res(_data.constData());
150
    if (index != 0) {
151
        res += std::to_string(index);
152
    }
153
    if (_postfix.size() != 0) {
154
        res += _postfix.constData();
155
    }
156
    return res;
157
}
158

159
void StringID::mark() const
160
{
161
    if (isMarked()) {
162
        return;
163
    }
164
    _flags.setFlag(Flag::Marked);
165
    for (auto& sid : _sids) {
166
        sid.deref().mark();
167
    }
168
}
169

170
///////////////////////////////////////////////////////////
171

172
TYPESYSTEM_SOURCE(App::StringHasher, Base::Persistence)
173

174
StringHasher::StringHasher()
175
    : _hashes(new HashMap)
176
{}
177

178
StringHasher::~StringHasher()
179
{
180
    clear();
181
}
182

183
void StringHasher::setSaveAll(bool enable)
184
{
185
    if (_hashes->SaveAll == enable) {
186
        return;
187
    }
188
    _hashes->SaveAll = enable;
189
    compact();
190
}
191

192
void StringHasher::compact()
193
{
194
    if (_hashes->SaveAll) {
195
        return;
196
    }
197

198
    // Make a list of all the table entries that have only a single reference and are not marked
199
    // "persistent"
200
    std::deque<StringIDRef> pendings;
201
    for (auto& hasher : _hashes->right) {
202
        if (!hasher.second->isPersistent() && hasher.second->getRefCount() == 1) {
203
            pendings.emplace_back(hasher.second);
204
        }
205
    }
206

207
    // Recursively remove the unused StringIDs
208
    while (!pendings.empty()) {
209
        StringIDRef sid = pendings.front();
210
        pendings.pop_front();
211
        // Try to erase the map entry for this StringID
212
        if (_hashes->right.erase(sid.value()) == 0U) {
213
            continue;// If nothing was erased, there's nothing more to do
214
        }
215
        sid._sid->_hasher = nullptr;
216
        sid._sid->unref();
217
        for (auto& hasher : sid._sid->_sids) {
218
            if (hasher._sid->_hasher == this && !hasher._sid->isPersistent()
219
                && hasher._sid->getRefCount() == 2) {
220
                // If the related StringID also uses this hasher, is not marked persistent, and has
221
                // a current reference count of 2 (which will be its hasher reference and its entry
222
                // in the related SIDs list), then prep it for removal as well.
223
                pendings.push_back(hasher);
224
            }
225
        }
226
    }
227
}
228

229
bool StringHasher::getSaveAll() const
230
{
231
    return _hashes->SaveAll;
232
}
233

234
void StringHasher::setThreshold(int threshold)
235
{
236
    _hashes->Threshold = threshold;
237
}
238

239
int StringHasher::getThreshold() const
240
{
241
    return _hashes->Threshold;
242
}
243

244
long StringHasher::lastID() const
245
{
246
    if (_hashes->right.empty()) {
247
        return 0;
248
    }
249
    auto it = _hashes->right.end();
250
    --it;
251
    return it->first;
252
}
253

254
StringIDRef StringHasher::getID(const char* text, int len, bool hashable)
255
{
256
    if (len < 0) {
257
        len = static_cast<int>(strlen(text));
258
    }
259
    return getID(QByteArray::fromRawData(text, len), hashable ? Option::Hashable : Option::None);
260
}
261

262
StringIDRef StringHasher::getID(const QByteArray& data, Options options)
263
{
264
    bool binary = options.testFlag(Option::Binary);
265
    bool hashable = options.testFlag(Option::Hashable);
266
    bool nocopy = options.testFlag(Option::NoCopy);
267

268
    bool hashed = hashable && _hashes->Threshold > 0 && (int)data.size() > _hashes->Threshold;
269

270
    StringID dataID;
271
    if (hashed) {
272
        QCryptographicHash hasher(QCryptographicHash::Sha1);
273
        hasher.addData(data);
274
        dataID._data = hasher.result();
275
    }
276
    else {
277
        dataID._data = data;
278
    }
279

280
    auto it = _hashes->left.find(&dataID);
281
    if (it != _hashes->left.end()) {
282
        return {it->first};
283
    }
284

285
    if (!hashed && !nocopy) {
286
        // if not hashed, make a deep copy of the data
287
        dataID._data = QByteArray(data.constData(), data.size());
288
    }
289

290
    StringID::Flags flags(StringID::Flag::None);
291
    if (binary) {
292
        flags.setFlag(StringID::Flag::Binary);
293
    }
294
    if (hashed) {
295
        flags.setFlag(StringID::Flag::Hashed);
296
    }
297
    StringIDRef sid(new StringID(lastID() + 1, dataID._data, flags));
298
    return {insert(sid)};
299
}
300

301
StringIDRef StringHasher::getID(const Data::MappedName& name, const QVector<StringIDRef>& sids)
302
{
303
    StringID tempID;
304
    tempID._postfix = name.postfixBytes();
305

306
    Data::IndexedName indexed;
307
    if (tempID._postfix.size() != 0) {
308
        // Only check for IndexedName if there is postfix, because of the way
309
        // we restore the StringID. See StringHasher::saveStream/restoreStreamNew()
310
        indexed = Data::IndexedName(name.dataBytes());
311
    }
312
    if (indexed) {
313
        // If this is an IndexedName, then _data only stores the base part of the name, without the
314
        // integer index
315
        tempID._data =
316
            QByteArray::fromRawData(indexed.getType(), static_cast<int>(strlen(indexed.getType())));
317
    }
318
    else {
319
        // Store the entire name in _data, but temporarily re-use the existing memory
320
        tempID._data = name.dataBytes();
321
    }
322

323
    // Check to see if there is already an entry in the hash table for this StringID
324
    auto it = _hashes->left.find(&tempID);
325
    if (it != _hashes->left.end()) {
326
        auto res = StringIDRef(it->first);
327
        if (indexed) {
328
            res._index = indexed.getIndex();
329
        }
330
        return res;
331
    }
332

333
    if (!indexed && name.isRaw()) {
334
        // Make a copy of the memory if we didn't do so earlier
335
        tempID._data = QByteArray(name.dataBytes().constData(), name.dataBytes().size());
336
    }
337

338
    // If the postfix is not already encoded, use getID to encode it:
339
    StringIDRef postfixRef;
340
    if ((tempID._postfix.size() != 0) && tempID._postfix.indexOf("#") < 0) {
341
        postfixRef = getID(tempID._postfix);
342
        postfixRef.toBytes(tempID._postfix);
343
    }
344

345
    // If _data is an IndexedName, use getID to encode it:
346
    StringIDRef indexRef;
347
    if (indexed) {
348
        indexRef = getID(tempID._data);
349
    }
350

351
    // The real StringID object that we are going to insert
352
    StringIDRef newStringIDRef(new StringID(lastID() + 1, tempID._data));
353
    StringID& newStringID = *newStringIDRef._sid;
354
    if (tempID._postfix.size() != 0) {
355
        newStringID._flags.setFlag(StringID::Flag::Postfixed);
356
        newStringID._postfix = tempID._postfix;
357
    }
358

359
    // Count the related SIDs that use this hasher
360
    int numSIDs = 0;
361
    for (const auto& relatedID : sids) {
362
        if (relatedID && relatedID._sid->_hasher == this) {
363
            ++numSIDs;
364
        }
365
    }
366

367
    int numAddedSIDs = (postfixRef ? 1 : 0) + (indexRef ? 1 : 0);
368
    if (numSIDs == sids.size() && !postfixRef && !indexRef) {
369
        // The simplest case: just copy the whole list
370
        newStringID._sids = sids;
371
    }
372
    else {
373
        // Put the added SIDs at the front of the SID list
374
        newStringID._sids.reserve(numSIDs + numAddedSIDs);
375
        if (postfixRef) {
376
            newStringID._flags.setFlag(StringID::Flag::PostfixEncoded);
377
            newStringID._sids.push_back(postfixRef);
378
        }
379
        if (indexRef) {
380
            newStringID._flags.setFlag(StringID::Flag::Indexed);
381
            newStringID._sids.push_back(indexRef);
382
        }
383
        // Append the sids from the input list whose hasher is this one
384
        for (const auto& relatedID : sids) {
385
            if (relatedID && relatedID._sid->_hasher == this) {
386
                newStringID._sids.push_back(relatedID);
387
            }
388
        }
389
    }
390

391
    // If the number of related IDs is larger than some threshold (hardcoded to 10 right now), then
392
    // remove any duplicates (ignoring the new SIDs we may have just added)
393
    const int relatedIDSizeThreshold {10};
394
    if (newStringID._sids.size() > relatedIDSizeThreshold) {
395
        std::sort(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end());
396
        newStringID._sids.erase(
397
            std::unique(newStringID._sids.begin() + numAddedSIDs, newStringID._sids.end()),
398
            newStringID._sids.end());
399
    }
400

401
    // If the new StringID has a postfix, but is not indexed, see if the data string itself
402
    // contains an index.
403
    if ((newStringID._postfix.size() != 0) && !indexed) {
404
        // Use the fromString function to parse the new StringID's data field for a possible index
405
        StringID::IndexID res = StringID::fromString(newStringID._data);
406
        if (res.id > 0) {// If the data had an index
407
            if (res.index != 0) {
408
                indexed.setIndex(res.index);
409
                newStringID._data.resize(newStringID._data.lastIndexOf(':')+1);
410
            }
411
            int offset = newStringID.isPostfixEncoded() ? 1 : 0;
412
            // Search for the SID with that index
413
            for (int i = offset; i < newStringID._sids.size(); ++i) {
414
                if (newStringID._sids[i].value() == res.id) {
415
                    if (i != offset) {
416
                        // If this SID is not already the first element in sids, move it there by
417
                        // swapping it with whatever WAS there
418
                        std::swap(newStringID._sids[offset], newStringID._sids[i]);
419
                    }
420
                    if (res.index != 0) {
421
                        newStringID._flags.setFlag(StringID::Flag::PrefixIDIndex);
422
                    }
423
                    else {
424
                        newStringID._flags.setFlag(StringID::Flag::PrefixID);
425
                    }
426
                    break;
427
                }
428
            }
429
        }
430
    }
431

432
    return {insert(newStringIDRef), indexed.getIndex()};
433
}
434

435
StringIDRef StringHasher::getID(long id, int index) const
436
{
437
    if (id <= 0) {
438
        return {};
439
    }
440
    auto it = _hashes->right.find(id);
441
    if (it == _hashes->right.end()) {
442
        return {};
443
    }
444
    StringIDRef res(it->second);
445
    res._index = index;
446
    return res;
447
}
448

449
void StringHasher::setPersistenceFileName(const char* filename) const
450
{
451
    if (!filename) {
452
        filename = "";
453
    }
454
    _filename = filename;
455
}
456

457
const std::string& StringHasher::getPersistenceFileName() const
458
{
459
    return _filename;
460
}
461

462
void StringHasher::Save(Base::Writer& writer) const
463
{
464

465
    size_t count = 0;
466
    if (_hashes->SaveAll) {
467
        count = _hashes->size();
468
    }
469
    else {
470
        count = 0;
471
        for (auto& hasher : _hashes->right) {
472
            if (hasher.second->isMarked() || hasher.second->isPersistent()) {
473
                ++count;
474
            }
475
        }
476
    }
477

478
    writer.Stream() << writer.ind() << "<StringHasher saveall=\"" << _hashes->SaveAll
479
                    << "\" threshold=\"" << _hashes->Threshold << "\"";
480

481
    if (count == 0U) {
482
        writer.Stream() << " count=\"0\"></StringHasher>\n";
483
        return;
484
    }
485

486
    writer.Stream() << " count=\"0\" new=\"1\"/>\n";
487

488
    writer.Stream() << writer.ind() << "<StringHasher2 ";
489
    if (!_filename.empty()) {
490
        writer.Stream() << " file=\"" << writer.addFile((_filename + ".txt").c_str(), this)
491
                        << "\"/>\n";
492
        return;
493
    }
494

495
    writer.Stream() << " count=\"" << count << "\">\n";
496
    saveStream(writer.beginCharStream() << '\n');
497
    writer.endCharStream() << '\n';
498
    writer.Stream() << writer.ind() << "</StringHasher2>\n";
499
}
500

501
void StringHasher::SaveDocFile(Base::Writer& writer) const
502
{
503
    std::size_t count = _hashes->SaveAll ? this->size() : this->count();
504
    writer.Stream() << "StringTableStart v1 " << count << '\n';
505
    saveStream(writer.Stream());
506
}
507

508
void StringHasher::saveStream(std::ostream& stream) const
509
{
510
    boost::io::ios_flags_saver ifs(stream);
511
    stream << std::hex;
512

513
    long anchor = 0;
514
    const StringID* last = nullptr;
515
    long lastID = 0;
516
    bool relative = false;
517

518
    for (auto& hasher : _hashes->right) {
519
        auto& d = *hasher.second;
520
        long id = d._id;
521
        if (!_hashes->SaveAll && !d.isMarked() && !d.isPersistent()) {
522
            continue;
523
        }
524

525
        // We use relative coding to save space. But in order to have some
526
        // minimum protection against corruption, write an absolute value every
527
        // once a while.
528
        relative = (id - anchor) < 1000;
529
        if (relative) {
530
            stream << '-' << id - lastID;
531
        }
532
        else {
533
            anchor = id;
534
            stream << id;
535
        }
536
        lastID = id;
537

538
        int offset = d.isPostfixEncoded() ? 1 : 0;
539

540
        StringID::IndexID prefixID {};
541
        prefixID.id = 0;
542
        prefixID.index = 0;
543
        if (d.isPrefixID()) {
544
            assert(d._sids.size() > offset);
545
            prefixID.id = d._sids[offset].value();
546
        }
547
        else if (d.isPrefixIDIndex()) {
548
            prefixID = StringID::fromString(d._data);
549
            assert(d._sids.size() > offset && d._sids[offset].value() == prefixID.id);
550
        }
551

552
        auto flags = d._flags;
553
        flags.setFlag(StringID::Flag::Marked, false);
554
        stream << '.' << flags.toUnderlyingType();
555

556
        int position = 0;
557
        if (!relative) {
558
            for (; position < d._sids.size(); ++position) {
559
                stream << '.' << d._sids[position].value();
560
            }
561
        }
562
        else {
563
            if (last) {
564
                for (; position < d._sids.size() && position < last->_sids.size(); ++position) {
565
                    long m = last->_sids[position].value();
566
                    long n = d._sids[position].value();
567
                    if (n < m) {
568
                        stream << ".-" << m - n;
569
                    }
570
                    else {
571
                        stream << '.' << n - m;
572
                    }
573
                }
574
            }
575
            for (; position < d._sids.size(); ++position) {
576
                stream << '.' << id - d._sids[position].value();
577
            }
578
        }
579

580
        last = &d;
581

582
        // Having postfix means it is a geometry element name, which
583
        // guarantees to be a single line without space. So it is safe to
584
        // store in raw stream.
585
        if (d.isPostfixed()) {
586
            if (!d.isPrefixIDIndex() && !d.isIndexed() && !d.isPrefixID()) {
587
                stream << ' ' << d._data.constData();
588
            }
589

590
            if (!d.isPostfixEncoded()) {
591
                stream << ' ' << d._postfix.constData();
592
            }
593
            stream << '\n';
594
        }
595
        else {
596
            // Reaching here means the string may contain space and newlines
597
            stream << ' ';
598
            stream << std::dec << d._data.constData() << std::hex;
599
        }
600
    }
601
}
602

603
void StringHasher::RestoreDocFile(Base::Reader& reader)
604
{
605
    std::string marker;
606
    std::string ver;
607
    reader >> marker;
608
    std::size_t count = 0;
609
    _hashes->clear();
610
    if (marker == "StringTableStart") {
611
        reader >> ver >> count;
612
        if (ver != "v1") {
613
            FC_WARN("Unknown string table format");
614
        }
615
        restoreStreamNew(reader, count);
616
        return;
617
    }
618
    count = atoi(marker.c_str());
619
    restoreStream(reader, count);
620
}
621

622
void StringHasher::restoreStreamNew(std::istream& stream, std::size_t count)
623
{
624
    Base::TextInputStream asciiStream (stream);
625
    _hashes->clear();
626
    std::string content;
627
    boost::io::ios_flags_saver ifs(stream);
628
    stream >> std::hex;
629
    std::vector<std::string> tokens;
630
    long lastid = 0;
631
    const StringID* last = nullptr;
632

633
    std::string tmp;
634

635
    for (uint32_t i = 0; i < count; ++i) {
636
        if (!(stream >> tmp)) {
637
            FC_THROWM(Base::RuntimeError, "Invalid string table");
638
        }
639

640
        tokens.clear();
641
        boost::split(tokens, tmp, boost::is_any_of("."));
642
        if (tokens.size() < 2) {
643
            FC_THROWM(Base::RuntimeError, "Invalid string table");
644
        }
645

646
        long id = 0;
647
        bool relative = false;
648
        if (tokens[0][0] == '-') {
649
            relative = true;
650
            id = lastid + strtol(tokens[0].c_str() + 1, nullptr, 16);
651
        }
652
        else {
653
            id = strtol(tokens[0].c_str(), nullptr, 16);
654
        }
655

656
        lastid = id;
657

658
        unsigned long flag = strtol(tokens[1].c_str(), nullptr, 16);
659
        StringIDRef sid(new StringID(id, QByteArray(), static_cast<StringID::Flag>(flag)));
660

661
        StringID& d = *sid._sid;
662
        d._sids.reserve(tokens.size() - 2);
663

664
        int j = 2;
665
        if (relative && last) {
666
            for (; j < (int)tokens.size() && j - 2 < last->_sids.size(); ++j) {
667
                long m = last->_sids[j - 2].value();
668
                long n;
669
                if (tokens[j][0] == '-') {
670
                    n = -strtol(&tokens[j][1], nullptr, 16);
671
                }
672
                else {
673
                    n = strtol(&tokens[j][0], nullptr, 16);
674
                }
675
                StringIDRef sid = getID(m + n);
676
                if (!sid) {
677
                    FC_THROWM(Base::RuntimeError, "Invalid string id reference");
678
                }
679
                d._sids.push_back(sid);
680
            }
681
        }
682
        for (; j < (int)tokens.size(); ++j) {
683
            long n = strtol(tokens[j].data(), nullptr, 16);
684
            StringIDRef sid = getID(relative ? id - n : n);
685
            if (!sid) {
686
                FC_THROWM(Base::RuntimeError, "Invalid string id reference");
687
            }
688
            d._sids.push_back(sid);
689
        }
690

691
        if (!d.isPostfixed()) {
692
            asciiStream >> content;
693
            if (d.isHashed() || d.isBinary()) {
694
                d._data = QByteArray::fromBase64(content.c_str());
695
            }
696
            else {
697
                d._data = content.c_str();
698
            }
699
        }
700
        else {
701
            int offset = 0;
702
            if (d.isPostfixEncoded()) {
703
                offset = 1;
704
                if (d._sids.empty()) {
705
                    FC_THROWM(Base::RuntimeError, "Missing string postfix");
706
                }
707
                d._postfix = d._sids[0]._sid->_data;
708
            }
709
            if (d.isIndexed()) {
710
                if (d._sids.size() <= offset) {
711
                    FC_THROWM(Base::RuntimeError, "Missing string prefix");
712
                }
713
                d._data = d._sids[offset]._sid->_data;
714
            }
715
            else if (d.isPrefixID() || d.isPrefixIDIndex()) {
716
                if (d._sids.size() <= offset) {
717
                    FC_THROWM(Base::RuntimeError, "Missing string prefix id");
718
                }
719
                d._data = d._sids[offset]._sid->toString(0).c_str();
720
                if (d.isPrefixIDIndex())
721
                    d._data += ":";
722
            }
723
            else {
724
                stream >> content;
725
                d._data = content.c_str();
726
            }
727
            if (!d.isPostfixEncoded()) {
728
                stream >> content;
729
                d._postfix = content.c_str();
730
            }
731
        }
732

733
        last = insert(sid);
734
    }
735
}
736

737
StringID* StringHasher::insert(const StringIDRef& sid)
738
{
739
    assert(sid && sid._sid->_hasher == nullptr);
740
    auto& hasher = *sid._sid;
741
    hasher._hasher = this;
742
    hasher.ref();
743
    auto res = _hashes->right.insert(_hashes->right.end(),
744
                                     HashMap::right_map::value_type(sid.value(), &hasher));
745
    if (res->second != &hasher) {
746
        hasher._hasher = nullptr;
747
        hasher.unref();
748
    }
749
    return res->second;
750
}
751

752
void StringHasher::restoreStream(std::istream& stream, std::size_t count)
753
{
754
    _hashes->clear();
755
    std::string content;
756
    for (uint32_t i = 0; i < count; ++i) {
757
        int32_t id = 0;
758
        uint8_t type = 0;
759
        stream >> id >> type >> content;
760
        StringIDRef sid = new StringID(id, QByteArray(), static_cast<StringID::Flag>(type));
761
        if (sid.isHashed() || sid.isBinary()) {
762
            sid._sid->_data = QByteArray::fromBase64(content.c_str());
763
        }
764
        else {
765
            sid._sid->_data = QByteArray(content.c_str());
766
        }
767
        insert(sid);
768
    }
769
}
770

771
void StringHasher::clear()
772
{
773
    for (auto& hasher : _hashes->right) {
774
        hasher.second->_hasher = nullptr;
775
        hasher.second->unref();
776
    }
777
    _hashes->clear();
778
}
779

780
size_t StringHasher::size() const
781
{
782
    return _hashes->size();
783
}
784

785
size_t StringHasher::count() const
786
{
787
    size_t count = 0;
788
    for (auto& hasher : _hashes->right) {
789
        if (hasher.second->getRefCount() > 1) {
790
            ++count;
791
        }
792
    }
793
    return count;
794
}
795

796
void StringHasher::Restore(Base::XMLReader& reader)
797
{
798
    clear();
799
    reader.readElement("StringHasher");
800
    _hashes->SaveAll = reader.getAttributeAsInteger("saveall") != 0L;
801
    _hashes->Threshold = static_cast<int>(reader.getAttributeAsInteger("threshold"));
802

803
    bool newTag = false;
804
    if (reader.hasAttribute("new") && reader.getAttributeAsInteger("new") > 0) {
805
        reader.readElement("StringHasher2");
806
        newTag = true;
807
    }
808

809
    if (reader.hasAttribute("file")) {
810
        const char* file = reader.getAttribute("file");
811
        if (*file != '\0') {
812
            reader.addFile(file, this);
813
        }
814
        return;
815
    }
816

817
    std::size_t count = reader.getAttributeAsUnsigned("count");
818
    if (newTag) {
819
        try {
820
        restoreStreamNew(reader.beginCharStream(), count);
821
        } catch (const Base::Exception &e) {
822
            e.ReportException();
823
            FC_ERR("Failed to restore string table: full-document recompute strongly recommended.");
824
        }
825
        reader.readEndElement("StringHasher2");
826
        return;
827
    }
828
    if ((count != 0U) && reader.FileVersion > 1) {
829
        restoreStream(reader.beginCharStream(), count);
830
    }
831
    else {
832
        for (std::size_t i = 0; i < count; ++i) {
833
            reader.readElement("Item");
834
            StringIDRef sid;
835
            long id = reader.getAttributeAsInteger("id");
836
            bool hashed = reader.hasAttribute("hash");
837
            if (hashed || reader.hasAttribute("data")) {
838
                const char* value =
839
                    hashed ? reader.getAttribute("hash") : reader.getAttribute("data");
840
                sid = new StringID(id, QByteArray::fromBase64(value), StringID::Flag::Hashed);
841
            }
842
            else {
843
                sid = new StringID(id, QByteArray(reader.getAttribute("text")));
844
            }
845
            insert(sid);
846
        }
847
    }
848
    reader.readEndElement("StringHasher");
849
}
850

851
unsigned int StringHasher::getMemSize() const
852
{
853
    return (_hashes->SaveAll ? size() : count()) * 10;
854
}
855

856
PyObject* StringHasher::getPyObject()
857
{
858
    return new StringHasherPy(this);
859
}
860

861
std::map<long, StringIDRef> StringHasher::getIDMap() const
862
{
863
    std::map<long, StringIDRef> ret;
864
    for (auto& hasher : _hashes->right) {
865
        ret.emplace_hint(ret.end(), hasher.first, StringIDRef(hasher.second));
866
    }
867
    return ret;
868
}
869

870
void StringHasher::clearMarks() const
871
{
872
    for (auto& hasher : _hashes->right) {
873
        hasher.second->_flags.setFlag(StringID::Flag::Marked, false);
874
    }
875
}
876

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.