llvm-project
752 строки · 24.9 Кб
1//===-- Serialization.cpp - Binary serialization of index data ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Serialization.h"
10#include "Headers.h"
11#include "RIFF.h"
12#include "index/MemIndex.h"
13#include "index/SymbolLocation.h"
14#include "index/SymbolOrigin.h"
15#include "index/dex/Dex.h"
16#include "support/Logger.h"
17#include "support/Trace.h"
18#include "clang/Tooling/CompilationDatabase.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/Compiler.h"
21#include "llvm/Support/Compression.h"
22#include "llvm/Support/Endian.h"
23#include "llvm/Support/Error.h"
24#include "llvm/Support/raw_ostream.h"
25#include <cstdint>
26#include <vector>
27
28namespace clang {
29namespace clangd {
30namespace {
31
32// IO PRIMITIVES
33// We use little-endian 32 bit ints, sometimes with variable-length encoding.
34//
35// Variable-length int encoding (varint) uses the bottom 7 bits of each byte
36// to encode the number, and the top bit to indicate whether more bytes follow.
37// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
38// This represents 0x1a | 0x2f<<7 = 6042.
39// A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
40
41// Reads binary data from a StringRef, and keeps track of position.
42class Reader {
43const char *Begin, *End;
44bool Err = false;
45
46public:
47Reader(llvm::StringRef Data) : Begin(Data.begin()), End(Data.end()) {}
48// The "error" bit is set by reading past EOF or reading invalid data.
49// When in an error state, reads may return zero values: callers should check.
50bool err() const { return Err; }
51// Did we read all the data, or encounter an error?
52bool eof() const { return Begin == End || Err; }
53// All the data we didn't read yet.
54llvm::StringRef rest() const { return llvm::StringRef(Begin, End - Begin); }
55
56uint8_t consume8() {
57if (LLVM_UNLIKELY(Begin == End)) {
58Err = true;
59return 0;
60}
61return *Begin++;
62}
63
64uint32_t consume32() {
65if (LLVM_UNLIKELY(Begin + 4 > End)) {
66Err = true;
67return 0;
68}
69auto Ret = llvm::support::endian::read32le(Begin);
70Begin += 4;
71return Ret;
72}
73
74llvm::StringRef consume(int N) {
75if (LLVM_UNLIKELY(Begin + N > End)) {
76Err = true;
77return llvm::StringRef();
78}
79llvm::StringRef Ret(Begin, N);
80Begin += N;
81return Ret;
82}
83
84uint32_t consumeVar() {
85constexpr static uint8_t More = 1 << 7;
86
87// Use a 32 bit unsigned here to prevent promotion to signed int (unless int
88// is wider than 32 bits).
89uint32_t B = consume8();
90if (LLVM_LIKELY(!(B & More)))
91return B;
92uint32_t Val = B & ~More;
93for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
94B = consume8();
95// 5th byte of a varint can only have lowest 4 bits set.
96assert((Shift != 28 || B == (B & 0x0f)) && "Invalid varint encoding");
97Val |= (B & ~More) << Shift;
98}
99return Val;
100}
101
102llvm::StringRef consumeString(llvm::ArrayRef<llvm::StringRef> Strings) {
103auto StringIndex = consumeVar();
104if (LLVM_UNLIKELY(StringIndex >= Strings.size())) {
105Err = true;
106return llvm::StringRef();
107}
108return Strings[StringIndex];
109}
110
111SymbolID consumeID() {
112llvm::StringRef Raw = consume(SymbolID::RawSize); // short if truncated.
113return LLVM_UNLIKELY(err()) ? SymbolID() : SymbolID::fromRaw(Raw);
114}
115
116// Read a varint (as consumeVar) and resize the container accordingly.
117// If the size is invalid, return false and mark an error.
118// (The caller should abort in this case).
119template <typename T> [[nodiscard]] bool consumeSize(T &Container) {
120auto Size = consumeVar();
121// Conservatively assume each element is at least one byte.
122if (Size > (size_t)(End - Begin)) {
123Err = true;
124return false;
125}
126Container.resize(Size);
127return true;
128}
129};
130
131void write32(uint32_t I, llvm::raw_ostream &OS) {
132char Buf[4];
133llvm::support::endian::write32le(Buf, I);
134OS.write(Buf, sizeof(Buf));
135}
136
137void writeVar(uint32_t I, llvm::raw_ostream &OS) {
138constexpr static uint8_t More = 1 << 7;
139if (LLVM_LIKELY(I < 1 << 7)) {
140OS.write(I);
141return;
142}
143for (;;) {
144OS.write(I | More);
145I >>= 7;
146if (I < 1 << 7) {
147OS.write(I);
148return;
149}
150}
151}
152
153// STRING TABLE ENCODING
154// Index data has many string fields, and many strings are identical.
155// We store each string once, and refer to them by index.
156//
157// The string table's format is:
158// - UncompressedSize : uint32 (or 0 for no compression)
159// - CompressedData : byte[CompressedSize]
160//
161// CompressedData is a zlib-compressed byte[UncompressedSize].
162// It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
163// These are sorted to improve compression.
164
165// Maps each string to a canonical representation.
166// Strings remain owned externally (e.g. by SymbolSlab).
167class StringTableOut {
168llvm::DenseSet<llvm::StringRef> Unique;
169std::vector<llvm::StringRef> Sorted;
170// Since strings are interned, look up can be by pointer.
171llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index;
172
173public:
174StringTableOut() {
175// Ensure there's at least one string in the table.
176// Table size zero is reserved to indicate no compression.
177Unique.insert("");
178}
179// Add a string to the table. Overwrites S if an identical string exists.
180void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; };
181// Finalize the table and write it to OS. No more strings may be added.
182void finalize(llvm::raw_ostream &OS) {
183Sorted = {Unique.begin(), Unique.end()};
184llvm::sort(Sorted);
185for (unsigned I = 0; I < Sorted.size(); ++I)
186Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
187
188std::string RawTable;
189for (llvm::StringRef S : Sorted) {
190RawTable.append(std::string(S));
191RawTable.push_back(0);
192}
193if (llvm::compression::zlib::isAvailable()) {
194llvm::SmallVector<uint8_t, 0> Compressed;
195llvm::compression::zlib::compress(llvm::arrayRefFromStringRef(RawTable),
196Compressed);
197write32(RawTable.size(), OS);
198OS << llvm::toStringRef(Compressed);
199} else {
200write32(0, OS); // No compression.
201OS << RawTable;
202}
203}
204// Get the ID of an string, which must be interned. Table must be finalized.
205unsigned index(llvm::StringRef S) const {
206assert(!Sorted.empty() && "table not finalized");
207assert(Index.count({S.data(), S.size()}) && "string not interned");
208return Index.find({S.data(), S.size()})->second;
209}
210};
211
212struct StringTableIn {
213llvm::BumpPtrAllocator Arena;
214std::vector<llvm::StringRef> Strings;
215};
216
217llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {
218Reader R(Data);
219size_t UncompressedSize = R.consume32();
220if (R.err())
221return error("Truncated string table");
222
223llvm::StringRef Uncompressed;
224llvm::SmallVector<uint8_t, 0> UncompressedStorage;
225if (UncompressedSize == 0) // No compression
226Uncompressed = R.rest();
227else if (llvm::compression::zlib::isAvailable()) {
228// Don't allocate a massive buffer if UncompressedSize was corrupted
229// This is effective for sharded index, but not big monolithic ones, as
230// once compressed size reaches 4MB nothing can be ruled out.
231// Theoretical max ratio from https://zlib.net/zlib_tech.html
232constexpr int MaxCompressionRatio = 1032;
233if (UncompressedSize / MaxCompressionRatio > R.rest().size())
234return error("Bad stri table: uncompress {0} -> {1} bytes is implausible",
235R.rest().size(), UncompressedSize);
236
237if (llvm::Error E = llvm::compression::zlib::decompress(
238llvm::arrayRefFromStringRef(R.rest()), UncompressedStorage,
239UncompressedSize))
240return std::move(E);
241Uncompressed = toStringRef(UncompressedStorage);
242} else
243return error("Compressed string table, but zlib is unavailable");
244
245StringTableIn Table;
246llvm::StringSaver Saver(Table.Arena);
247R = Reader(Uncompressed);
248for (Reader R(Uncompressed); !R.eof();) {
249auto Len = R.rest().find(0);
250if (Len == llvm::StringRef::npos)
251return error("Bad string table: not null terminated");
252Table.Strings.push_back(Saver.save(R.consume(Len)));
253R.consume8();
254}
255if (R.err())
256return error("Truncated string table");
257return std::move(Table);
258}
259
260// SYMBOL ENCODING
261// Each field of clangd::Symbol is encoded in turn (see implementation).
262// - StringRef fields encode as varint (index into the string table)
263// - enums encode as the underlying type
264// - most numbers encode as varint
265
266void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
267llvm::raw_ostream &OS) {
268writeVar(Strings.index(Loc.FileURI), OS);
269for (const auto &Endpoint : {Loc.Start, Loc.End}) {
270writeVar(Endpoint.line(), OS);
271writeVar(Endpoint.column(), OS);
272}
273}
274
275SymbolLocation readLocation(Reader &Data,
276llvm::ArrayRef<llvm::StringRef> Strings) {
277SymbolLocation Loc;
278Loc.FileURI = Data.consumeString(Strings).data();
279for (auto *Endpoint : {&Loc.Start, &Loc.End}) {
280Endpoint->setLine(Data.consumeVar());
281Endpoint->setColumn(Data.consumeVar());
282}
283return Loc;
284}
285
286IncludeGraphNode readIncludeGraphNode(Reader &Data,
287llvm::ArrayRef<llvm::StringRef> Strings) {
288IncludeGraphNode IGN;
289IGN.Flags = static_cast<IncludeGraphNode::SourceFlag>(Data.consume8());
290IGN.URI = Data.consumeString(Strings);
291llvm::StringRef Digest = Data.consume(IGN.Digest.size());
292std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
293if (!Data.consumeSize(IGN.DirectIncludes))
294return IGN;
295for (llvm::StringRef &Include : IGN.DirectIncludes)
296Include = Data.consumeString(Strings);
297return IGN;
298}
299
300void writeIncludeGraphNode(const IncludeGraphNode &IGN,
301const StringTableOut &Strings,
302llvm::raw_ostream &OS) {
303OS.write(static_cast<uint8_t>(IGN.Flags));
304writeVar(Strings.index(IGN.URI), OS);
305llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
306IGN.Digest.size());
307OS << Hash;
308writeVar(IGN.DirectIncludes.size(), OS);
309for (llvm::StringRef Include : IGN.DirectIncludes)
310writeVar(Strings.index(Include), OS);
311}
312
313void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
314llvm::raw_ostream &OS) {
315OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
316// symbol IDs should probably be in a string table.
317OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
318OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
319writeVar(Strings.index(Sym.Name), OS);
320writeVar(Strings.index(Sym.Scope), OS);
321writeVar(Strings.index(Sym.TemplateSpecializationArgs), OS);
322writeLocation(Sym.Definition, Strings, OS);
323writeLocation(Sym.CanonicalDeclaration, Strings, OS);
324writeVar(Sym.References, OS);
325OS.write(static_cast<uint8_t>(Sym.Flags));
326writeVar(Strings.index(Sym.Signature), OS);
327writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
328writeVar(Strings.index(Sym.Documentation), OS);
329writeVar(Strings.index(Sym.ReturnType), OS);
330writeVar(Strings.index(Sym.Type), OS);
331
332auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
333writeVar(Strings.index(Include.IncludeHeader), OS);
334writeVar((Include.References << 2) | Include.SupportedDirectives, OS);
335};
336writeVar(Sym.IncludeHeaders.size(), OS);
337for (const auto &Include : Sym.IncludeHeaders)
338WriteInclude(Include);
339}
340
341Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings,
342SymbolOrigin Origin) {
343Symbol Sym;
344Sym.ID = Data.consumeID();
345Sym.SymInfo.Kind = static_cast<index::SymbolKind>(Data.consume8());
346Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(Data.consume8());
347Sym.Name = Data.consumeString(Strings);
348Sym.Scope = Data.consumeString(Strings);
349Sym.TemplateSpecializationArgs = Data.consumeString(Strings);
350Sym.Definition = readLocation(Data, Strings);
351Sym.CanonicalDeclaration = readLocation(Data, Strings);
352Sym.References = Data.consumeVar();
353Sym.Flags = static_cast<Symbol::SymbolFlag>(Data.consume8());
354Sym.Origin = Origin;
355Sym.Signature = Data.consumeString(Strings);
356Sym.CompletionSnippetSuffix = Data.consumeString(Strings);
357Sym.Documentation = Data.consumeString(Strings);
358Sym.ReturnType = Data.consumeString(Strings);
359Sym.Type = Data.consumeString(Strings);
360if (!Data.consumeSize(Sym.IncludeHeaders))
361return Sym;
362for (auto &I : Sym.IncludeHeaders) {
363I.IncludeHeader = Data.consumeString(Strings);
364uint32_t RefsWithDirectives = Data.consumeVar();
365I.References = RefsWithDirectives >> 2;
366I.SupportedDirectives = RefsWithDirectives & 0x3;
367}
368return Sym;
369}
370
371// REFS ENCODING
372// A refs section has data grouped by Symbol. Each symbol has:
373// - SymbolID: 8 bytes
374// - NumRefs: varint
375// - Ref[NumRefs]
376// Fields of Ref are encoded in turn, see implementation.
377
378void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
379const StringTableOut &Strings, llvm::raw_ostream &OS) {
380OS << ID.raw();
381writeVar(Refs.size(), OS);
382for (const auto &Ref : Refs) {
383OS.write(static_cast<unsigned char>(Ref.Kind));
384writeLocation(Ref.Location, Strings, OS);
385OS << Ref.Container.raw();
386}
387}
388
389std::pair<SymbolID, std::vector<Ref>>
390readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
391std::pair<SymbolID, std::vector<Ref>> Result;
392Result.first = Data.consumeID();
393if (!Data.consumeSize(Result.second))
394return Result;
395for (auto &Ref : Result.second) {
396Ref.Kind = static_cast<RefKind>(Data.consume8());
397Ref.Location = readLocation(Data, Strings);
398Ref.Container = Data.consumeID();
399}
400return Result;
401}
402
403// RELATIONS ENCODING
404// A relations section is a flat list of relations. Each relation has:
405// - SymbolID (subject): 8 bytes
406// - relation kind (predicate): 1 byte
407// - SymbolID (object): 8 bytes
408// In the future, we might prefer a packed representation if the need arises.
409
410void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
411OS << R.Subject.raw();
412OS.write(static_cast<uint8_t>(R.Predicate));
413OS << R.Object.raw();
414}
415
416Relation readRelation(Reader &Data) {
417SymbolID Subject = Data.consumeID();
418RelationKind Predicate = static_cast<RelationKind>(Data.consume8());
419SymbolID Object = Data.consumeID();
420return {Subject, Predicate, Object};
421}
422
423struct InternedCompileCommand {
424llvm::StringRef Directory;
425std::vector<llvm::StringRef> CommandLine;
426};
427
428void writeCompileCommand(const InternedCompileCommand &Cmd,
429const StringTableOut &Strings,
430llvm::raw_ostream &CmdOS) {
431writeVar(Strings.index(Cmd.Directory), CmdOS);
432writeVar(Cmd.CommandLine.size(), CmdOS);
433for (llvm::StringRef C : Cmd.CommandLine)
434writeVar(Strings.index(C), CmdOS);
435}
436
437InternedCompileCommand
438readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
439InternedCompileCommand Cmd;
440Cmd.Directory = CmdReader.consumeString(Strings);
441if (!CmdReader.consumeSize(Cmd.CommandLine))
442return Cmd;
443for (llvm::StringRef &C : Cmd.CommandLine)
444C = CmdReader.consumeString(Strings);
445return Cmd;
446}
447
448// FILE ENCODING
449// A file is a RIFF chunk with type 'CdIx'.
450// It contains the sections:
451// - meta: version number
452// - srcs: information related to include graph
453// - stri: string table
454// - symb: symbols
455// - refs: references to symbols
456
457// The current versioning scheme is simple - non-current versions are rejected.
458// If you make a breaking change, bump this version number to invalidate stored
459// data. Later we may want to support some backward compatibility.
460constexpr static uint32_t Version = 19;
461
462llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
463SymbolOrigin Origin) {
464auto RIFF = riff::readFile(Data);
465if (!RIFF)
466return RIFF.takeError();
467if (RIFF->Type != riff::fourCC("CdIx"))
468return error("wrong RIFF filetype: {0}", riff::fourCCStr(RIFF->Type));
469llvm::StringMap<llvm::StringRef> Chunks;
470for (const auto &Chunk : RIFF->Chunks)
471Chunks.try_emplace(llvm::StringRef(Chunk.ID.data(), Chunk.ID.size()),
472Chunk.Data);
473
474if (!Chunks.count("meta"))
475return error("missing meta chunk");
476Reader Meta(Chunks.lookup("meta"));
477auto SeenVersion = Meta.consume32();
478if (SeenVersion != Version)
479return error("wrong version: want {0}, got {1}", Version, SeenVersion);
480
481// meta chunk is checked above, as we prefer the "version mismatch" error.
482for (llvm::StringRef RequiredChunk : {"stri"})
483if (!Chunks.count(RequiredChunk))
484return error("missing required chunk {0}", RequiredChunk);
485
486auto Strings = readStringTable(Chunks.lookup("stri"));
487if (!Strings)
488return Strings.takeError();
489
490IndexFileIn Result;
491if (Chunks.count("srcs")) {
492Reader SrcsReader(Chunks.lookup("srcs"));
493Result.Sources.emplace();
494while (!SrcsReader.eof()) {
495auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
496auto Entry = Result.Sources->try_emplace(IGN.URI).first;
497Entry->getValue() = std::move(IGN);
498// We change all the strings inside the structure to point at the keys in
499// the map, since it is the only copy of the string that's going to live.
500Entry->getValue().URI = Entry->getKey();
501for (auto &Include : Entry->getValue().DirectIncludes)
502Include = Result.Sources->try_emplace(Include).first->getKey();
503}
504if (SrcsReader.err())
505return error("malformed or truncated include uri");
506}
507
508if (Chunks.count("symb")) {
509Reader SymbolReader(Chunks.lookup("symb"));
510SymbolSlab::Builder Symbols;
511while (!SymbolReader.eof())
512Symbols.insert(readSymbol(SymbolReader, Strings->Strings, Origin));
513if (SymbolReader.err())
514return error("malformed or truncated symbol");
515Result.Symbols = std::move(Symbols).build();
516}
517if (Chunks.count("refs")) {
518Reader RefsReader(Chunks.lookup("refs"));
519RefSlab::Builder Refs;
520while (!RefsReader.eof()) {
521auto RefsBundle = readRefs(RefsReader, Strings->Strings);
522for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
523Refs.insert(RefsBundle.first, Ref);
524}
525if (RefsReader.err())
526return error("malformed or truncated refs");
527Result.Refs = std::move(Refs).build();
528}
529if (Chunks.count("rela")) {
530Reader RelationsReader(Chunks.lookup("rela"));
531RelationSlab::Builder Relations;
532while (!RelationsReader.eof())
533Relations.insert(readRelation(RelationsReader));
534if (RelationsReader.err())
535return error("malformed or truncated relations");
536Result.Relations = std::move(Relations).build();
537}
538if (Chunks.count("cmdl")) {
539Reader CmdReader(Chunks.lookup("cmdl"));
540InternedCompileCommand Cmd =
541readCompileCommand(CmdReader, Strings->Strings);
542if (CmdReader.err())
543return error("malformed or truncated commandline section");
544Result.Cmd.emplace();
545Result.Cmd->Directory = std::string(Cmd.Directory);
546Result.Cmd->CommandLine.reserve(Cmd.CommandLine.size());
547for (llvm::StringRef C : Cmd.CommandLine)
548Result.Cmd->CommandLine.emplace_back(C);
549}
550return std::move(Result);
551}
552
553template <class Callback>
554void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
555CB(IGN.URI);
556for (llvm::StringRef &Include : IGN.DirectIncludes)
557CB(Include);
558}
559
560void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
561assert(Data.Symbols && "An index file without symbols makes no sense!");
562riff::File RIFF;
563RIFF.Type = riff::fourCC("CdIx");
564
565llvm::SmallString<4> Meta;
566{
567llvm::raw_svector_ostream MetaOS(Meta);
568write32(Version, MetaOS);
569}
570RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
571
572StringTableOut Strings;
573std::vector<Symbol> Symbols;
574for (const auto &Sym : *Data.Symbols) {
575Symbols.emplace_back(Sym);
576visitStrings(Symbols.back(),
577[&](llvm::StringRef &S) { Strings.intern(S); });
578}
579std::vector<IncludeGraphNode> Sources;
580if (Data.Sources)
581for (const auto &Source : *Data.Sources) {
582Sources.push_back(Source.getValue());
583visitStrings(Sources.back(),
584[&](llvm::StringRef &S) { Strings.intern(S); });
585}
586
587std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
588if (Data.Refs) {
589for (const auto &Sym : *Data.Refs) {
590Refs.emplace_back(Sym);
591for (auto &Ref : Refs.back().second) {
592llvm::StringRef File = Ref.Location.FileURI;
593Strings.intern(File);
594Ref.Location.FileURI = File.data();
595}
596}
597}
598
599std::vector<Relation> Relations;
600if (Data.Relations) {
601for (const auto &Relation : *Data.Relations) {
602Relations.emplace_back(Relation);
603// No strings to be interned in relations.
604}
605}
606
607InternedCompileCommand InternedCmd;
608if (Data.Cmd) {
609InternedCmd.CommandLine.reserve(Data.Cmd->CommandLine.size());
610InternedCmd.Directory = Data.Cmd->Directory;
611Strings.intern(InternedCmd.Directory);
612for (llvm::StringRef C : Data.Cmd->CommandLine) {
613InternedCmd.CommandLine.emplace_back(C);
614Strings.intern(InternedCmd.CommandLine.back());
615}
616}
617
618std::string StringSection;
619{
620llvm::raw_string_ostream StringOS(StringSection);
621Strings.finalize(StringOS);
622}
623RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
624
625std::string SymbolSection;
626{
627llvm::raw_string_ostream SymbolOS(SymbolSection);
628for (const auto &Sym : Symbols)
629writeSymbol(Sym, Strings, SymbolOS);
630}
631RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
632
633std::string RefsSection;
634if (Data.Refs) {
635{
636llvm::raw_string_ostream RefsOS(RefsSection);
637for (const auto &Sym : Refs)
638writeRefs(Sym.first, Sym.second, Strings, RefsOS);
639}
640RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
641}
642
643std::string RelationSection;
644if (Data.Relations) {
645{
646llvm::raw_string_ostream RelationOS{RelationSection};
647for (const auto &Relation : Relations)
648writeRelation(Relation, RelationOS);
649}
650RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
651}
652
653std::string SrcsSection;
654{
655{
656llvm::raw_string_ostream SrcsOS(SrcsSection);
657for (const auto &SF : Sources)
658writeIncludeGraphNode(SF, Strings, SrcsOS);
659}
660RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
661}
662
663std::string CmdlSection;
664if (Data.Cmd) {
665{
666llvm::raw_string_ostream CmdOS(CmdlSection);
667writeCompileCommand(InternedCmd, Strings, CmdOS);
668}
669RIFF.Chunks.push_back({riff::fourCC("cmdl"), CmdlSection});
670}
671
672OS << RIFF;
673}
674
675} // namespace
676
677// Defined in YAMLSerialization.cpp.
678void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
679llvm::Expected<IndexFileIn> readYAML(llvm::StringRef, SymbolOrigin Origin);
680
681llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
682switch (O.Format) {
683case IndexFileFormat::RIFF:
684writeRIFF(O, OS);
685break;
686case IndexFileFormat::YAML:
687writeYAML(O, OS);
688break;
689}
690return OS;
691}
692
693llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data,
694SymbolOrigin Origin) {
695if (Data.starts_with("RIFF")) {
696return readRIFF(Data, Origin);
697}
698if (auto YAMLContents = readYAML(Data, Origin)) {
699return std::move(*YAMLContents);
700} else {
701return error("Not a RIFF file and failed to parse as YAML: {0}",
702YAMLContents.takeError());
703}
704}
705
706std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
707SymbolOrigin Origin, bool UseDex) {
708trace::Span OverallTracer("LoadIndex");
709auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
710if (!Buffer) {
711elog("Can't open {0}: {1}", SymbolFilename, Buffer.getError().message());
712return nullptr;
713}
714
715SymbolSlab Symbols;
716RefSlab Refs;
717RelationSlab Relations;
718{
719trace::Span Tracer("ParseIndex");
720if (auto I = readIndexFile(Buffer->get()->getBuffer(), Origin)) {
721if (I->Symbols)
722Symbols = std::move(*I->Symbols);
723if (I->Refs)
724Refs = std::move(*I->Refs);
725if (I->Relations)
726Relations = std::move(*I->Relations);
727} else {
728elog("Bad index file: {0}", I.takeError());
729return nullptr;
730}
731}
732
733size_t NumSym = Symbols.size();
734size_t NumRefs = Refs.numRefs();
735size_t NumRelations = Relations.size();
736
737trace::Span Tracer("BuildIndex");
738auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs),
739std::move(Relations))
740: MemIndex::build(std::move(Symbols), std::move(Refs),
741std::move(Relations));
742vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
743" - number of symbols: {3}\n"
744" - number of refs: {4}\n"
745" - number of relations: {5}",
746UseDex ? "Dex" : "MemIndex", SymbolFilename,
747Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
748return Index;
749}
750
751} // namespace clangd
752} // namespace clang
753