llvm-project

Форк
0
752 строки · 24.9 Кб
1
//===-- Serialization.cpp - Binary serialization of index data ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8

9
#include "Serialization.h"
10
#include "Headers.h"
11
#include "RIFF.h"
12
#include "index/MemIndex.h"
13
#include "index/SymbolLocation.h"
14
#include "index/SymbolOrigin.h"
15
#include "index/dex/Dex.h"
16
#include "support/Logger.h"
17
#include "support/Trace.h"
18
#include "clang/Tooling/CompilationDatabase.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/Support/Compiler.h"
21
#include "llvm/Support/Compression.h"
22
#include "llvm/Support/Endian.h"
23
#include "llvm/Support/Error.h"
24
#include "llvm/Support/raw_ostream.h"
25
#include <cstdint>
26
#include <vector>
27

28
namespace clang {
29
namespace clangd {
30
namespace {
31

32
// IO PRIMITIVES
33
// We use little-endian 32 bit ints, sometimes with variable-length encoding.
34
//
35
// Variable-length int encoding (varint) uses the bottom 7 bits of each byte
36
// to encode the number, and the top bit to indicate whether more bytes follow.
37
// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
38
// This represents 0x1a | 0x2f<<7 = 6042.
39
// A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
40

41
// Reads binary data from a StringRef, and keeps track of position.
42
class Reader {
43
  const char *Begin, *End;
44
  bool Err = false;
45

46
public:
47
  Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
48
  // The "error" bit is set by reading past EOF or reading invalid data.
49
  // When in an error state, reads may return zero values: callers should check.
50
  bool err() const { return Err; }
51
  // Did we read all the data, or encounter an error?
52
  bool eof() const { return Begin == End || Err; }
53
  // All the data we didn't read yet.
54
  llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
55

56
  uint8_t consume8() {
57
    if (LLVM_UNLIKELY(Begin == End)) {
58
      Err = true;
59
      return 0;
60
    }
61
    return *Begin++;
62
  }
63

64
  uint32_t consume32() {
65
    if (LLVM_UNLIKELY(Begin + 4 > End)) {
66
      Err = true;
67
      return 0;
68
    }
69
    auto Ret = llvm::support::endian::read32le(Begin);
70
    Begin += 4;
71
    return Ret;
72
  }
73

74
  llvm::StringRef consume(int N) {
75
    if (LLVM_UNLIKELY(Begin + N > End)) {
76
      Err = true;
77
      return llvm::StringRef();
78
    }
79
    llvm::StringRef Ret(Begin, N);
80
    Begin += N;
81
    return Ret;
82
  }
83

84
  uint32_t consumeVar() {
85
    constexpr static uint8_t More = 1 << 7;
86

87
    // Use a 32 bit unsigned here to prevent promotion to signed int (unless int
88
    // is wider than 32 bits).
89
    uint32_t B = consume8();
90
    if (LLVM_LIKELY(!(B & More)))
91
      return B;
92
    uint32_t Val = B & ~More;
93
    for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
94
      B = consume8();
95
      // 5th byte of a varint can only have lowest 4 bits set.
96
      assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
97
      Val |= (B & ~More) << Shift;
98
    }
99
    return Val;
100
  }
101

102
  llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
103
    auto StringIndex = consumeVar();
104
    if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
105
      Err = true;
106
      return llvm::StringRef();
107
    }
108
    return Strings[StringIndex];
109
  }
110

111
  SymbolID consumeID() {
112
    llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
113
    return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
114
  }
115

116
  // Read a varint (as consumeVar) and resize the container accordingly.
117
  // If the size is invalid, return false and mark an error.
118
  // (The caller should abort in this case).
119
  template <typename T> [[nodiscard]] bool consumeSize(T &Container) {
120
    auto Size = consumeVar();
121
    // Conservatively assume each element is at least one byte.
122
    if (Size > (size_t)(End - Begin)) {
123
      Err = true;
124
      return false;
125
    }
126
    Container.resize(Size);
127
    return true;
128
  }
129
};
130

131
void write32(uint32_t I, llvm::raw_ostream &OS) {
132
  char Buf[4];
133
  llvm::support::endian::write32le(Buf, I);
134
  OS.write(Buf, sizeof(Buf));
135
}
136

137
void writeVar(uint32_t I, llvm::raw_ostream &OS) {
138
  constexpr static uint8_t More = 1 << 7;
139
  if (LLVM_LIKELY(I < 1 << 7)) {
140
    OS.write(I);
141
    return;
142
  }
143
  for (;;) {
144
    OS.write(I | More);
145
    I >>= 7;
146
    if (I < 1 << 7) {
147
      OS.write(I);
148
      return;
149
    }
150
  }
151
}
152

153
// STRING TABLE ENCODING
154
// Index data has many string fields, and many strings are identical.
155
// We store each string once, and refer to them by index.
156
//
157
// The string table's format is:
158
//   - UncompressedSize : uint32 (or 0 for no compression)
159
//   - CompressedData   : byte[CompressedSize]
160
//
161
// CompressedData is a zlib-compressed byte[UncompressedSize].
162
// It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
163
// These are sorted to improve compression.
164

165
// Maps each string to a canonical representation.
166
// Strings remain owned externally (e.g. by SymbolSlab).
167
class StringTableOut {
168
  llvm::DenseSet<llvm::StringRef> Unique;
169
  std::vector<llvm::StringRef> Sorted;
170
  // Since strings are interned, look up can be by pointer.
171
  llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
172

173
public:
174
  StringTableOut() {
175
    // Ensure there's at least one string in the table.
176
    // Table size zero is reserved to indicate no compression.
177
    Unique.insert("");
178
  }
179
  // Add a string to the table. Overwrites S if an identical string exists.
180
  void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
181
  // Finalize the table and write it to OS. No more strings may be added.
182
  void finalize(llvm::raw_ostream &OS) {
183
    Sorted = {Unique.begin(), Unique.end()};
184
    llvm::sort(Sorted);
185
    for (unsigned I = 0; I < Sorted.size(); ++I)
186
      Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
187

188
    std::string RawTable;
189
    for (llvm::StringRef S : Sorted) {
190
      RawTable.append(std::string(S));
191
      RawTable.push_back(0);
192
    }
193
    if (llvm::compression::zlib::isAvailable()) {
194
      llvm::SmallVector<uint8_t, 0> Compressed;
195
      llvm::compression::zlib::compress(llvm::arrayRefFromStringRef(RawTable),
196
                                        Compressed);
197
      write32(RawTable.size(), OS);
198
      OS << llvm::toStringRef(Compressed);
199
    } else {
200
      write32(0, OS); // No compression.
201
      OS << RawTable;
202
    }
203
  }
204
  // Get the ID of an string, which must be interned. Table must be finalized.
205
  unsigned index(llvm::StringRef S) const {
206
    assert(!Sorted.empty() && "table not finalized");
207
    assert(Index.count({S.data(), S.size()}) && "string not interned");
208
    return Index.find({S.data(), S.size()})->second;
209
  }
210
};
211

212
struct StringTableIn {
213
  llvm::BumpPtrAllocator Arena;
214
  std::vector<llvm::StringRef> Strings;
215
};
216

217
llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
218
  Reader R(Data);
219
  size_t UncompressedSize = R.consume32();
220
  if (R.err())
221
    return error("Truncated string table");
222

223
  llvm::StringRef Uncompressed;
224
  llvm::SmallVector<uint8_t, 0> UncompressedStorage;
225
  if (UncompressedSize == 0) // No compression
226
    Uncompressed = R.rest();
227
  else if (llvm::compression::zlib::isAvailable()) {
228
    // Don't allocate a massive buffer if UncompressedSize was corrupted
229
    // This is effective for sharded index, but not big monolithic ones, as
230
    // once compressed size reaches 4MB nothing can be ruled out.
231
    // Theoretical max ratio from https://zlib.net/zlib_tech.html
232
    constexpr int MaxCompressionRatio = 1032;
233
    if (UncompressedSize / MaxCompressionRatio > R.rest().size())
234
      return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
235
                   R.rest().size(), UncompressedSize);
236

237
    if (llvm::Error E = llvm::compression::zlib::decompress(
238
            llvm::arrayRefFromStringRef(R.rest()), UncompressedStorage,
239
            UncompressedSize))
240
      return std::move(E);
241
    Uncompressed = toStringRef(UncompressedStorage);
242
  } else
243
    return error("Compressed string table, but zlib is unavailable");
244

245
  StringTableIn Table;
246
  llvm::StringSaver Saver(Table.Arena);
247
  R = Reader(Uncompressed);
248
  for (Reader R(Uncompressed); !R.eof();) {
249
    auto Len = R.rest().find(0);
250
    if (Len == llvm::StringRef::npos)
251
      return error("Bad string table: not null terminated");
252
    Table.Strings.push_back(Saver.save(R.consume(Len)));
253
    R.consume8();
254
  }
255
  if (R.err())
256
    return error("Truncated string table");
257
  return std::move(Table);
258
}
259

260
// SYMBOL ENCODING
261
// Each field of clangd::Symbol is encoded in turn (see implementation).
262
//  - StringRef fields encode as varint (index into the string table)
263
//  - enums encode as the underlying type
264
//  - most numbers encode as varint
265

266
void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
267
                   llvm::raw_ostream &OS) {
268
  writeVar(Strings.index(Loc.FileURI), OS);
269
  for (const auto &Endpoint : {Loc.Start, Loc.End}) {
270
    writeVar(Endpoint.line(), OS);
271
    writeVar(Endpoint.column(), OS);
272
  }
273
}
274

275
SymbolLocation readLocation(Reader &Data,
276
                            llvm::ArrayRef<llvm::StringRef> Strings) {
277
  SymbolLocation Loc;
278
  Loc.FileURI = Data.consumeString(Strings).data();
279
  for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
280
    Endpoint->setLine(Data.consumeVar());
281
    Endpoint->setColumn(Data.consumeVar());
282
  }
283
  return Loc;
284
}
285

286
IncludeGraphNode readIncludeGraphNode(Reader &Data,
287
                                      llvm::ArrayRef<llvm::StringRef> Strings) {
288
  IncludeGraphNode IGN;
289
  IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
290
  IGN.URI = Data.consumeString(Strings);
291
  llvm::StringRef Digest = Data.consume(IGN.Digest.size());
292
  std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
293
  if (!Data.consumeSize(IGN.DirectIncludes))
294
    return IGN;
295
  for (llvm::StringRef &Include : IGN.DirectIncludes)
296
    Include = Data.consumeString(Strings);
297
  return IGN;
298
}
299

300
void writeIncludeGraphNode(const IncludeGraphNode &IGN,
301
                           const StringTableOut &Strings,
302
                           llvm::raw_ostream &OS) {
303
  OS.write(static_cast<uint8_t>(IGN.Flags));
304
  writeVar(Strings.index(IGN.URI), OS);
305
  llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
306
                       IGN.Digest.size());
307
  OS << Hash;
308
  writeVar(IGN.DirectIncludes.size(), OS);
309
  for (llvm::StringRef Include : IGN.DirectIncludes)
310
    writeVar(Strings.index(Include), OS);
311
}
312

313
void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
314
                 llvm::raw_ostream &OS) {
315
  OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
316
                      // symbol IDs should probably be in a string table.
317
  OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
318
  OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
319
  writeVar(Strings.index(Sym.Name), OS);
320
  writeVar(Strings.index(Sym.Scope), OS);
321
  writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
322
  writeLocation(Sym.Definition, Strings, OS);
323
  writeLocation(Sym.CanonicalDeclaration, Strings, OS);
324
  writeVar(Sym.References, OS);
325
  OS.write(static_cast<uint8_t>(Sym.Flags));
326
  writeVar(Strings.index(Sym.Signature), OS);
327
  writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
328
  writeVar(Strings.index(Sym.Documentation), OS);
329
  writeVar(Strings.index(Sym.ReturnType), OS);
330
  writeVar(Strings.index(Sym.Type), OS);
331

332
  auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
333
    writeVar(Strings.index(Include.IncludeHeader), OS);
334
    writeVar((Include.References << 2) | Include.SupportedDirectives, OS);
335
  };
336
  writeVar(Sym.IncludeHeaders.size(), OS);
337
  for (const auto &Include : Sym.IncludeHeaders)
338
    WriteInclude(Include);
339
}
340

341
Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings,
342
                  SymbolOrigin Origin) {
343
  Symbol Sym;
344
  Sym.ID = Data.consumeID();
345
  Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
346
  Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
347
  Sym.Name = Data.consumeString(Strings);
348
  Sym.Scope = Data.consumeString(Strings);
349
  Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
350
  Sym.Definition = readLocation(Data, Strings);
351
  Sym.CanonicalDeclaration = readLocation(Data, Strings);
352
  Sym.References = Data.consumeVar();
353
  Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
354
  Sym.Origin = Origin;
355
  Sym.Signature = Data.consumeString(Strings);
356
  Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
357
  Sym.Documentation = Data.consumeString(Strings);
358
  Sym.ReturnType = Data.consumeString(Strings);
359
  Sym.Type = Data.consumeString(Strings);
360
  if (!Data.consumeSize(Sym.IncludeHeaders))
361
    return Sym;
362
  for (auto &I : Sym.IncludeHeaders) {
363
    I.IncludeHeader = Data.consumeString(Strings);
364
    uint32_t RefsWithDirectives = Data.consumeVar();
365
    I.References = RefsWithDirectives >> 2;
366
    I.SupportedDirectives = RefsWithDirectives & 0x3;
367
  }
368
  return Sym;
369
}
370

371
// REFS ENCODING
372
// A refs section has data grouped by Symbol. Each symbol has:
373
//  - SymbolID: 8 bytes
374
//  - NumRefs: varint
375
//  - Ref[NumRefs]
376
// Fields of Ref are encoded in turn, see implementation.
377

378
void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
379
               const StringTableOut &Strings, llvm::raw_ostream &OS) {
380
  OS << ID.raw();
381
  writeVar(Refs.size(), OS);
382
  for (const auto &Ref : Refs) {
383
    OS.write(static_cast<unsigned char>(Ref.Kind));
384
    writeLocation(Ref.Location, Strings, OS);
385
    OS << Ref.Container.raw();
386
  }
387
}
388

389
std::pair<SymbolID, std::vector<Ref>>
390
readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
391
  std::pair<SymbolID, std::vector<Ref>> Result;
392
  Result.first = Data.consumeID();
393
  if (!Data.consumeSize(Result.second))
394
    return Result;
395
  for (auto &Ref : Result.second) {
396
    Ref.Kind = static_cast<RefKind>(Data.consume8());
397
    Ref.Location = readLocation(Data, Strings);
398
    Ref.Container = Data.consumeID();
399
  }
400
  return Result;
401
}
402

403
// RELATIONS ENCODING
404
// A relations section is a flat list of relations. Each relation has:
405
//  - SymbolID (subject): 8 bytes
406
//  - relation kind (predicate): 1 byte
407
//  - SymbolID (object): 8 bytes
408
// In the future, we might prefer a packed representation if the need arises.
409

410
void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
411
  OS << R.Subject.raw();
412
  OS.write(static_cast<uint8_t>(R.Predicate));
413
  OS << R.Object.raw();
414
}
415

416
Relation readRelation(Reader &Data) {
417
  SymbolID Subject = Data.consumeID();
418
  RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
419
  SymbolID Object = Data.consumeID();
420
  return {Subject, Predicate, Object};
421
}
422

423
struct InternedCompileCommand {
424
  llvm::StringRef Directory;
425
  std::vector<llvm::StringRef> CommandLine;
426
};
427

428
void writeCompileCommand(const InternedCompileCommand &Cmd,
429
                         const StringTableOut &Strings,
430
                         llvm::raw_ostream &CmdOS) {
431
  writeVar(Strings.index(Cmd.Directory), CmdOS);
432
  writeVar(Cmd.CommandLine.size(), CmdOS);
433
  for (llvm::StringRef C : Cmd.CommandLine)
434
    writeVar(Strings.index(C), CmdOS);
435
}
436

437
InternedCompileCommand
438
readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
439
  InternedCompileCommand Cmd;
440
  Cmd.Directory = CmdReader.consumeString(Strings);
441
  if (!CmdReader.consumeSize(Cmd.CommandLine))
442
    return Cmd;
443
  for (llvm::StringRef &C : Cmd.CommandLine)
444
    C = CmdReader.consumeString(Strings);
445
  return Cmd;
446
}
447

448
// FILE ENCODING
449
// A file is a RIFF chunk with type 'CdIx'.
450
// It contains the sections:
451
//   - meta: version number
452
//   - srcs: information related to include graph
453
//   - stri: string table
454
//   - symb: symbols
455
//   - refs: references to symbols
456

457
// The current versioning scheme is simple - non-current versions are rejected.
458
// If you make a breaking change, bump this version number to invalidate stored
459
// data. Later we may want to support some backward compatibility.
460
constexpr static uint32_t Version = 19;
461

462
llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
463
                                     SymbolOrigin Origin) {
464
  auto RIFF = riff::readFile(Data);
465
  if (!RIFF)
466
    return RIFF.takeError();
467
  if (RIFF->Type != riff::fourCC("CdIx"))
468
    return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
469
  llvm::StringMap<llvm::StringRef> Chunks;
470
  for (const auto &Chunk : RIFF->Chunks)
471
    Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
472
                       Chunk.Data);
473

474
  if (!Chunks.count("meta"))
475
    return error("missing meta chunk");
476
  Reader Meta(Chunks.lookup("meta"));
477
  auto SeenVersion = Meta.consume32();
478
  if (SeenVersion != Version)
479
    return error("wrong version: want {0}, got {1}", Version, SeenVersion);
480

481
  // meta chunk is checked above, as we prefer the "version mismatch" error.
482
  for (llvm::StringRef RequiredChunk : {"stri"})
483
    if (!Chunks.count(RequiredChunk))
484
      return error("missing required chunk {0}", RequiredChunk);
485

486
  auto Strings = readStringTable(Chunks.lookup("stri"));
487
  if (!Strings)
488
    return Strings.takeError();
489

490
  IndexFileIn Result;
491
  if (Chunks.count("srcs")) {
492
    Reader SrcsReader(Chunks.lookup("srcs"));
493
    Result.Sources.emplace();
494
    while (!SrcsReader.eof()) {
495
      auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
496
      auto Entry = Result.Sources->try_emplace(IGN.URI).first;
497
      Entry->getValue() = std::move(IGN);
498
      // We change all the strings inside the structure to point at the keys in
499
      // the map, since it is the only copy of the string that's going to live.
500
      Entry->getValue().URI = Entry->getKey();
501
      for (auto &Include : Entry->getValue().DirectIncludes)
502
        Include = Result.Sources->try_emplace(Include).first->getKey();
503
    }
504
    if (SrcsReader.err())
505
      return error("malformed or truncated include uri");
506
  }
507

508
  if (Chunks.count("symb")) {
509
    Reader SymbolReader(Chunks.lookup("symb"));
510
    SymbolSlab::Builder Symbols;
511
    while (!SymbolReader.eof())
512
      Symbols.insert(readSymbol(SymbolReader, Strings->Strings, Origin));
513
    if (SymbolReader.err())
514
      return error("malformed or truncated symbol");
515
    Result.Symbols = std::move(Symbols).build();
516
  }
517
  if (Chunks.count("refs")) {
518
    Reader RefsReader(Chunks.lookup("refs"));
519
    RefSlab::Builder Refs;
520
    while (!RefsReader.eof()) {
521
      auto RefsBundle = readRefs(RefsReader, Strings->Strings);
522
      for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
523
        Refs.insert(RefsBundle.first, Ref);
524
    }
525
    if (RefsReader.err())
526
      return error("malformed or truncated refs");
527
    Result.Refs = std::move(Refs).build();
528
  }
529
  if (Chunks.count("rela")) {
530
    Reader RelationsReader(Chunks.lookup("rela"));
531
    RelationSlab::Builder Relations;
532
    while (!RelationsReader.eof())
533
      Relations.insert(readRelation(RelationsReader));
534
    if (RelationsReader.err())
535
      return error("malformed or truncated relations");
536
    Result.Relations = std::move(Relations).build();
537
  }
538
  if (Chunks.count("cmdl")) {
539
    Reader CmdReader(Chunks.lookup("cmdl"));
540
    InternedCompileCommand Cmd =
541
        readCompileCommand(CmdReader, Strings->Strings);
542
    if (CmdReader.err())
543
      return error("malformed or truncated commandline section");
544
    Result.Cmd.emplace();
545
    Result.Cmd->Directory = std::string(Cmd.Directory);
546
    Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
547
    for (llvm::StringRef C : Cmd.CommandLine)
548
      Result.Cmd->CommandLine.emplace_back(C);
549
  }
550
  return std::move(Result);
551
}
552

553
template <class Callback>
554
void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
555
  CB(IGN.URI);
556
  for (llvm::StringRef &Include : IGN.DirectIncludes)
557
    CB(Include);
558
}
559

560
void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
561
  assert(Data.Symbols && "An index file without symbols makes no sense!");
562
  riff::File RIFF;
563
  RIFF.Type = riff::fourCC("CdIx");
564

565
  llvm::SmallString<4> Meta;
566
  {
567
    llvm::raw_svector_ostream MetaOS(Meta);
568
    write32(Version, MetaOS);
569
  }
570
  RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
571

572
  StringTableOut Strings;
573
  std::vector<Symbol> Symbols;
574
  for (const auto &Sym : *Data.Symbols) {
575
    Symbols.emplace_back(Sym);
576
    visitStrings(Symbols.back(),
577
                 [&](llvm::StringRef &S) { Strings.intern(S); });
578
  }
579
  std::vector<IncludeGraphNode> Sources;
580
  if (Data.Sources)
581
    for (const auto &Source : *Data.Sources) {
582
      Sources.push_back(Source.getValue());
583
      visitStrings(Sources.back(),
584
                   [&](llvm::StringRef &S) { Strings.intern(S); });
585
    }
586

587
  std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
588
  if (Data.Refs) {
589
    for (const auto &Sym : *Data.Refs) {
590
      Refs.emplace_back(Sym);
591
      for (auto &Ref : Refs.back().second) {
592
        llvm::StringRef File = Ref.Location.FileURI;
593
        Strings.intern(File);
594
        Ref.Location.FileURI = File.data();
595
      }
596
    }
597
  }
598

599
  std::vector<Relation> Relations;
600
  if (Data.Relations) {
601
    for (const auto &Relation : *Data.Relations) {
602
      Relations.emplace_back(Relation);
603
      // No strings to be interned in relations.
604
    }
605
  }
606

607
  InternedCompileCommand InternedCmd;
608
  if (Data.Cmd) {
609
    InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
610
    InternedCmd.Directory = Data.Cmd->Directory;
611
    Strings.intern(InternedCmd.Directory);
612
    for (llvm::StringRef C : Data.Cmd->CommandLine) {
613
      InternedCmd.CommandLine.emplace_back(C);
614
      Strings.intern(InternedCmd.CommandLine.back());
615
    }
616
  }
617

618
  std::string StringSection;
619
  {
620
    llvm::raw_string_ostream StringOS(StringSection);
621
    Strings.finalize(StringOS);
622
  }
623
  RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
624

625
  std::string SymbolSection;
626
  {
627
    llvm::raw_string_ostream SymbolOS(SymbolSection);
628
    for (const auto &Sym : Symbols)
629
      writeSymbol(Sym, Strings, SymbolOS);
630
  }
631
  RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
632

633
  std::string RefsSection;
634
  if (Data.Refs) {
635
    {
636
      llvm::raw_string_ostream RefsOS(RefsSection);
637
      for (const auto &Sym : Refs)
638
        writeRefs(Sym.first, Sym.second, Strings, RefsOS);
639
    }
640
    RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
641
  }
642

643
  std::string RelationSection;
644
  if (Data.Relations) {
645
    {
646
      llvm::raw_string_ostream RelationOS{RelationSection};
647
      for (const auto &Relation : Relations)
648
        writeRelation(Relation, RelationOS);
649
    }
650
    RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
651
  }
652

653
  std::string SrcsSection;
654
  {
655
    {
656
      llvm::raw_string_ostream SrcsOS(SrcsSection);
657
      for (const auto &SF : Sources)
658
        writeIncludeGraphNode(SF, Strings, SrcsOS);
659
    }
660
    RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
661
  }
662

663
  std::string CmdlSection;
664
  if (Data.Cmd) {
665
    {
666
      llvm::raw_string_ostream CmdOS(CmdlSection);
667
      writeCompileCommand(InternedCmd, Strings, CmdOS);
668
    }
669
    RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
670
  }
671

672
  OS << RIFF;
673
}
674

675
} // namespace
676

677
// Defined in YAMLSerialization.cpp.
678
void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
679
llvm::Expected<IndexFileIn> readYAML(llvm::StringRef, SymbolOrigin Origin);
680

681
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
682
  switch (O.Format) {
683
  case IndexFileFormat::RIFF:
684
    writeRIFF(O, OS);
685
    break;
686
  case IndexFileFormat::YAML:
687
    writeYAML(O, OS);
688
    break;
689
  }
690
  return OS;
691
}
692

693
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data,
694
                                          SymbolOrigin Origin) {
695
  if (Data.starts_with("RIFF")) {
696
    return readRIFF(Data, Origin);
697
  }
698
  if (auto YAMLContents = readYAML(Data, Origin)) {
699
    return std::move(*YAMLContents);
700
  } else {
701
    return error("Not a RIFF file and failed to parse as YAML: {0}",
702
                 YAMLContents.takeError());
703
  }
704
}
705

706
std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
707
                                       SymbolOrigin Origin, bool UseDex) {
708
  trace::Span OverallTracer("LoadIndex");
709
  auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
710
  if (!Buffer) {
711
    elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
712
    return nullptr;
713
  }
714

715
  SymbolSlab Symbols;
716
  RefSlab Refs;
717
  RelationSlab Relations;
718
  {
719
    trace::Span Tracer("ParseIndex");
720
    if (auto I = readIndexFile(Buffer->get()->getBuffer(), Origin)) {
721
      if (I->Symbols)
722
        Symbols = std::move(*I->Symbols);
723
      if (I->Refs)
724
        Refs = std::move(*I->Refs);
725
      if (I->Relations)
726
        Relations = std::move(*I->Relations);
727
    } else {
728
      elog("Bad index file: {0}", I.takeError());
729
      return nullptr;
730
    }
731
  }
732

733
  size_t NumSym = Symbols.size();
734
  size_t NumRefs = Refs.numRefs();
735
  size_t NumRelations = Relations.size();
736

737
  trace::Span Tracer("BuildIndex");
738
  auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
739
                                        std::move(Relations))
740
                      : MemIndex::build(std::move(Symbols), std::move(Refs),
741
                                        std::move(Relations));
742
  vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
743
       "  - number of symbols: {3}\n"
744
       "  - number of refs: {4}\n"
745
       "  - number of relations: {5}",
746
       UseDex ? "Dex" : "MemIndex", SymbolFilename,
747
       Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
748
  return Index;
749
}
750

751
} // namespace clangd
752
} // namespace clang
753

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.